Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add external node table #4105

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions extension/duckdb/test/test_files/external_table.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
-DATASET CSV empty

--

-CASE ExternalDuckDBTable

-STATEMENT load extension "${KUZU_ROOT_DIRECTORY}/extension/duckdb/build/libduckdb.kuzu_extension"
---- ok
-STATEMENT ATTACH '${KUZU_ROOT_DIRECTORY}/dataset/databases/duckdb_database/tinysnb.db' as tinysnb (dbtype duckdb, skip_unsupported_table = true);
---- 1
Attached database successfully.
-STATEMENT BEGIN TRANSACTION;
---- ok
-STATEMENT CREATE EXTERNAL NODE TABLE duck_person AS tinysnb.person (PRIMARY KEY (ID));
---- ok
-STATEMENT COMMIT;
---- ok
-RELOADDB
-STATEMENT CALL SHOW_TABLES() RETURN *;
---- 1
duck_person|EXTERNAL_NODE|local(kuzu)|
-STATEMENT BEGIN TRANSACTION;
---- ok
-STATEMENT ALTER TABLE duck_person RENAME TO d_person;
---- ok
-STATEMENT COMMIT;
---- ok
-RELOADDB
-STATEMENT CALL SHOW_TABLES() RETURN *;
---- 1
d_person|EXTERNAL_NODE|local(kuzu)|
-STATEMENT BEGIN TRANSACTION;
---- ok
-STATEMENT DROP TABLE d_person;
---- ok
-STATEMENT COMMIT;
---- ok
-RELOADDB
-STATEMENT CALL SHOW_TABLES() RETURN *;
---- 0
-STATEMENT load extension "${KUZU_ROOT_DIRECTORY}/extension/duckdb/build/libduckdb.kuzu_extension"
---- ok
-STATEMENT ATTACH '${KUZU_ROOT_DIRECTORY}/dataset/databases/duckdb_database/tinysnb.db' as tinysnb (dbtype duckdb, skip_unsupported_table = true);
---- 1
Attached database successfully.
-STATEMENT CREATE EXTERNAL NODE TABLE duck_person AS tinysnb.person (PRIMARY KEY (ID));
---- 1
Table duck_person has been created.
-STATEMENT COPY duck_person FROM (LOAD FROM tinysnb.person RETURN ID);
---- ok
-STATEMENT MATCH (a:duck_person) RETURN a.ID, a.fName;
---- 8
0|Alice
2|Bob
3|Carol
5|Dan
7|Elizabeth
8|Farooq
9|Greg
10|Hubert Blaine Wolfeschlegelsteinhausenbergerdorff
23 changes: 14 additions & 9 deletions scripts/antlr4/Cypher.g4
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ EXPORT : ( 'E' | 'e' ) ( 'X' | 'x' ) ( 'P' | 'p' ) ( 'O' | 'o' ) ( 'R' | 'r' ) (

EXTENSION : ( 'E' | 'e' ) ( 'X' | 'x' ) ( 'T' | 't' ) ( 'E' | 'e' ) ( 'N' | 'n' ) ( 'S' | 's' ) ( 'I' | 'i' ) ( 'O' | 'o' ) ( 'N' | 'n' ) ;

EXTERNAL : ( 'E' | 'e' ) ( 'X' | 'x' ) ( 'T' | 't' ) ( 'E' | 'e' ) ( 'R' | 'r' ) ( 'N' | 'n' ) ( 'A' | 'a' ) ( 'L' | 'l' ) ;

FALSE : ( 'F' | 'f' ) ( 'A' | 'a' ) ( 'L' | 'l' ) ( 'S' | 's' ) ( 'E' | 'e' ) ;

FROM : ( 'F' | 'f' ) ( 'R' | 'r' ) ( 'O' | 'o' ) ( 'M' | 'm' ) ;
Expand Down Expand Up @@ -231,6 +233,7 @@ oC_Cypher
oC_Statement
: oC_Query
| kU_CreateNodeTable
| kU_CreateExternalNodeTable
| kU_CreateRelTable
| kU_CreateRelTableGroup
| kU_CreateRdfGraph
Expand Down Expand Up @@ -318,10 +321,13 @@ kU_IfNotExists
: IF SP NOT SP EXISTS ;

kU_CreateNodeTable
: CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint ) SP? ')' ;
: CREATE SP NODE SP TABLE SP ( kU_IfNotExists SP )? oC_SchemaName SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_PrimaryKey ) SP? ')' ;

kU_CreateExternalNodeTable
: CREATE SP EXTERNAL SP NODE SP TABLE SP oC_SchemaName SP AS SP oC_SchemaName kU_TableLookup SP? '(' SP? kU_PrimaryKey SP? ')' ;

kU_CreateRelTable
: CREATE SP REL SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName SP? '(' SP? kU_RelTableConnection SP? ( ',' SP? kU_PropertyDefinitions SP? )? ( ',' SP? oC_SymbolicName SP? )? ')' ;
: CREATE SP REL SP TABLE SP ( kU_IfNotExists SP )? oC_SchemaName SP? '(' SP? kU_RelTableConnection SP? ( ',' SP? kU_PropertyDefinitions SP? )? ( ',' SP? oC_SymbolicName SP? )? ')' ;

kU_CreateRelTableGroup
: CREATE SP REL SP TABLE SP GROUP SP (kU_IfNotExists SP)? oC_SchemaName SP? '(' SP? kU_RelTableConnection ( SP? ',' SP? kU_RelTableConnection )+ SP? ( ',' SP? kU_PropertyDefinitions SP? )? ( ',' SP? oC_SymbolicName SP? )? ')' ;
Expand All @@ -330,10 +336,10 @@ kU_RelTableConnection
: FROM SP oC_SchemaName SP TO SP oC_SchemaName ;

kU_CreateRdfGraph
: CREATE SP RDFGRAPH SP (kU_IfNotExists SP)? oC_SchemaName ;
: CREATE SP RDFGRAPH SP ( kU_IfNotExists SP )? oC_SchemaName ;

kU_CreateSequence
: CREATE SP SEQUENCE SP (kU_IfNotExists SP)? oC_SchemaName (SP kU_SequenceOptions)* ;
: CREATE SP SEQUENCE SP ( kU_IfNotExists SP )? oC_SchemaName ( SP kU_SequenceOptions )* ;

kU_CreateType
: CREATE SP TYPE SP oC_SchemaName SP AS SP kU_DataType SP? ;
Expand Down Expand Up @@ -393,7 +399,7 @@ kU_PropertyDefinitions : kU_PropertyDefinition ( SP? ',' SP? kU_PropertyDefiniti

kU_PropertyDefinition : kU_ColumnDefinition ( SP kU_Default )? ;

kU_CreateNodeConstraint : PRIMARY SP KEY SP? '(' SP? oC_PropertyKeyName SP? ')' ;
kU_PrimaryKey : PRIMARY SP KEY SP? '(' SP? oC_PropertyKeyName SP? ')' ;

DECIMAL: ( 'D' | 'd' ) ( 'E' | 'e' ) ( 'C' | 'c' ) ( 'I' | 'i' ) ( 'M' | 'm' ) ( 'A' | 'a' ) ( 'L' | 'l' ) ;

Expand Down Expand Up @@ -808,6 +814,9 @@ kU_CountSubquery
oC_PropertyLookup
: '.' SP? ( oC_PropertyKeyName | STAR ) ;

kU_TableLookup
: '.' SP? oC_SchemaName ;

oC_CaseExpression
: ( ( CASE ( SP? oC_CaseAlternative )+ ) | ( CASE SP? oC_Expression ( SP? oC_CaseAlternative )+ ) ) ( SP? ELSE SP? oC_Expression )? SP? END ;

Expand Down Expand Up @@ -954,10 +963,6 @@ kU_NonReservedKeywords
| TYPE
| USE
| WRITE
| SINGLE
| NONE
| ANY
| ALL
;

UnescapedSymbolicName
Expand Down
2 changes: 1 addition & 1 deletion scripts/antlr4/hash.md5
Original file line number Diff line number Diff line change
@@ -1 +1 @@
675c2985175bbbf23811cbc12a11870e
2e49cc9ece0a0f04e6b8518c18f148fc
17 changes: 12 additions & 5 deletions src/antlr4/Cypher.g4
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ oC_Cypher
oC_Statement
: oC_Query
| kU_CreateNodeTable
| kU_CreateExternalNodeTable
| kU_CreateRelTable
| kU_CreateRelTableGroup
| kU_CreateRdfGraph
Expand Down Expand Up @@ -95,10 +96,13 @@ kU_IfNotExists
: IF SP NOT SP EXISTS ;

kU_CreateNodeTable
: CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint ) SP? ')' ;
: CREATE SP NODE SP TABLE SP ( kU_IfNotExists SP )? oC_SchemaName SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_PrimaryKey ) SP? ')' ;

kU_CreateExternalNodeTable
: CREATE SP EXTERNAL SP NODE SP TABLE SP oC_SchemaName SP AS SP oC_SchemaName kU_TableLookup SP? '(' SP? kU_PrimaryKey SP? ')' ;

kU_CreateRelTable
: CREATE SP REL SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName SP? '(' SP? kU_RelTableConnection SP? ( ',' SP? kU_PropertyDefinitions SP? )? ( ',' SP? oC_SymbolicName SP? )? ')' ;
: CREATE SP REL SP TABLE SP ( kU_IfNotExists SP )? oC_SchemaName SP? '(' SP? kU_RelTableConnection SP? ( ',' SP? kU_PropertyDefinitions SP? )? ( ',' SP? oC_SymbolicName SP? )? ')' ;

kU_CreateRelTableGroup
: CREATE SP REL SP TABLE SP GROUP SP (kU_IfNotExists SP)? oC_SchemaName SP? '(' SP? kU_RelTableConnection ( SP? ',' SP? kU_RelTableConnection )+ SP? ( ',' SP? kU_PropertyDefinitions SP? )? ( ',' SP? oC_SymbolicName SP? )? ')' ;
Expand All @@ -107,10 +111,10 @@ kU_RelTableConnection
: FROM SP oC_SchemaName SP TO SP oC_SchemaName ;

kU_CreateRdfGraph
: CREATE SP RDFGRAPH SP (kU_IfNotExists SP)? oC_SchemaName ;
: CREATE SP RDFGRAPH SP ( kU_IfNotExists SP )? oC_SchemaName ;

kU_CreateSequence
: CREATE SP SEQUENCE SP (kU_IfNotExists SP)? oC_SchemaName (SP kU_SequenceOptions)* ;
: CREATE SP SEQUENCE SP ( kU_IfNotExists SP )? oC_SchemaName ( SP kU_SequenceOptions )* ;

kU_CreateType
: CREATE SP TYPE SP oC_SchemaName SP AS SP kU_DataType SP? ;
Expand Down Expand Up @@ -170,7 +174,7 @@ kU_PropertyDefinitions : kU_PropertyDefinition ( SP? ',' SP? kU_PropertyDefiniti

kU_PropertyDefinition : kU_ColumnDefinition ( SP kU_Default )? ;

kU_CreateNodeConstraint : PRIMARY SP KEY SP? '(' SP? oC_PropertyKeyName SP? ')' ;
kU_PrimaryKey : PRIMARY SP KEY SP? '(' SP? oC_PropertyKeyName SP? ')' ;

DECIMAL: ( 'D' | 'd' ) ( 'E' | 'e' ) ( 'C' | 'c' ) ( 'I' | 'i' ) ( 'M' | 'm' ) ( 'A' | 'a' ) ( 'L' | 'l' ) ;

Expand Down Expand Up @@ -585,6 +589,9 @@ kU_CountSubquery
oC_PropertyLookup
: '.' SP? ( oC_PropertyKeyName | STAR ) ;

kU_TableLookup
: '.' SP? oC_SchemaName ;

oC_CaseExpression
: ( ( CASE ( SP? oC_CaseAlternative )+ ) | ( CASE SP? oC_Expression ( SP? oC_CaseAlternative )+ ) ) ( SP? ELSE SP? oC_Expression )? SP? END ;

Expand Down
1 change: 1 addition & 0 deletions src/antlr4/keywords.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ EXISTS
EXPLAIN
EXPORT
EXTENSION
EXTERNAL
FALSE
FROM
GLOB
Expand Down
1 change: 1 addition & 0 deletions src/binder/bind/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ add_library(
bind_transaction.cpp
bind_updating_clause.cpp
bind_extension.cpp
bind_external.cpp
bind_export_database.cpp
bind_import_database.cpp
bind_use_database.cpp)
Expand Down
82 changes: 61 additions & 21 deletions src/binder/bind/bind_ddl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,14 @@ static void validatePrimaryKey(const std::string& pkColName,
}
}

BoundCreateTableInfo Binder::bindCreateTableInfo(const parser::CreateTableInfo* info) {
switch (info->tableType) {
BoundCreateTableInfo Binder::bindCreateTableInfo(const parser::CreateTableInfo& info) {
switch (info.tableType) {
case TableType::NODE: {
return bindCreateNodeTableInfo(info);
}
case TableType::EXTERNAL_NODE: {
return bindCreateExternalNodeTableInfo(info);
}
case TableType::REL: {
return bindCreateRelTableInfo(info);
}
Expand All @@ -127,24 +130,61 @@ BoundCreateTableInfo Binder::bindCreateTableInfo(const parser::CreateTableInfo*
}
}

BoundCreateTableInfo Binder::bindCreateNodeTableInfo(const CreateTableInfo* info) {
auto propertyDefinitions = bindPropertyDefinitions(info->propertyDefinitions, info->tableName);
auto& extraInfo = info->extraInfo->constCast<ExtraCreateNodeTableInfo>();
BoundCreateTableInfo Binder::bindCreateNodeTableInfo(const CreateTableInfo& info) {
auto propertyDefinitions = bindPropertyDefinitions(info.propertyDefinitions, info.tableName);
auto& extraInfo = info.extraInfo->constCast<ExtraCreateNodeTableInfo>();
validatePrimaryKey(extraInfo.pKName, propertyDefinitions);
auto boundExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(extraInfo.pKName,
std::move(propertyDefinitions));
return BoundCreateTableInfo(TableType::NODE, info->tableName, info->onConflict,
return BoundCreateTableInfo(TableType::NODE, info.tableName, info.onConflict,
std::move(boundExtraInfo));
}

static PropertyDefinition getDefinition(const std::vector<PropertyDefinition>& definitions,
const std::string& name) {
for (auto& definition : definitions) {
if (definition.getName() == name) {
return definition.copy();
}
}
// LCOV_EXCL_START
throw BinderException(stringFormat("Cannot find property with name {}.", name));
// LCOV_EXCL_STOP
}

static std::string getPhysicalTableName(std::string name) {
return "_" + name;
}

BoundCreateTableInfo Binder::bindCreateExternalNodeTableInfo(const CreateTableInfo& info) {
auto& extraInfo = info.extraInfo->constCast<ExtraCreateExternalNodeTableInfo>();
auto entry = bindExternalTableEntry(extraInfo.dbName, extraInfo.tableName);
auto& propertyDefinitions = entry->getProperties();
// Bind physical create node table info
auto pkDefinition = getDefinition(propertyDefinitions, extraInfo.pkName);
std::vector<PropertyDefinition> physicalPropertyDefinitions;
physicalPropertyDefinitions.push_back(pkDefinition.copy());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you need to copy here? getDefinition returns a copied one already.

auto boundPhysicalExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(extraInfo.pkName,
std::move(physicalPropertyDefinitions));
auto boundPhysicalCreateInfo =
BoundCreateTableInfo(TableType::NODE, getPhysicalTableName(info.tableName),
ConflictAction::ON_CONFLICT_THROW, std::move(boundPhysicalExtraInfo));
// Bind create node table reference info
auto boundExtraInfo = std::make_unique<BoundExtraCreateExternalNodeTableInfo>(extraInfo.pkName,
extraInfo.dbName, extraInfo.tableName, std::move(boundPhysicalCreateInfo),
copyVector(propertyDefinitions));
return BoundCreateTableInfo(TableType::EXTERNAL_NODE, info.tableName, info.onConflict,
std::move(boundExtraInfo));
}

BoundCreateTableInfo Binder::bindCreateRelTableInfo(const CreateTableInfo* info) {
BoundCreateTableInfo Binder::bindCreateRelTableInfo(const CreateTableInfo& info) {
std::vector<PropertyDefinition> propertyDefinitions;
propertyDefinitions.emplace_back(
ColumnDefinition(InternalKeyword::ID, LogicalType::INTERNAL_ID()));
for (auto& definition : bindPropertyDefinitions(info->propertyDefinitions, info->tableName)) {
for (auto& definition : bindPropertyDefinitions(info.propertyDefinitions, info.tableName)) {
propertyDefinitions.push_back(definition.copy());
}
auto& extraInfo = info->extraInfo->constCast<ExtraCreateRelTableInfo>();
auto& extraInfo = info.extraInfo->constCast<ExtraCreateRelTableInfo>();
auto srcMultiplicity = RelMultiplicityUtils::getFwd(extraInfo.relMultiplicity);
auto dstMultiplicity = RelMultiplicityUtils::getBwd(extraInfo.relMultiplicity);
auto srcTableID = bindTableID(extraInfo.srcTableName);
Expand All @@ -153,7 +193,7 @@ BoundCreateTableInfo Binder::bindCreateRelTableInfo(const CreateTableInfo* info)
validateTableType(dstTableID, TableType::NODE);
auto boundExtraInfo = std::make_unique<BoundExtraCreateRelTableInfo>(srcMultiplicity,
dstMultiplicity, srcTableID, dstTableID, std::move(propertyDefinitions));
return BoundCreateTableInfo(TableType::REL, info->tableName, info->onConflict,
return BoundCreateTableInfo(TableType::REL, info.tableName, info.onConflict,
std::move(boundExtraInfo));
}

Expand All @@ -162,29 +202,29 @@ static std::string getRelGroupTableName(const std::string& relGroupName,
return relGroupName + "_" + srcTableName + "_" + dstTableName;
}

BoundCreateTableInfo Binder::bindCreateRelTableGroupInfo(const CreateTableInfo* info) {
auto relGroupName = info->tableName;
auto& extraInfo = info->extraInfo->constCast<ExtraCreateRelTableGroupInfo>();
BoundCreateTableInfo Binder::bindCreateRelTableGroupInfo(const CreateTableInfo& info) {
auto relGroupName = info.tableName;
auto& extraInfo = info.extraInfo->constCast<ExtraCreateRelTableGroupInfo>();
auto relMultiplicity = extraInfo.relMultiplicity;
std::vector<BoundCreateTableInfo> boundCreateRelTableInfos;
auto relCreateInfo = std::make_unique<CreateTableInfo>(TableType::REL, "", info->onConflict);
relCreateInfo->propertyDefinitions = copyVector(info->propertyDefinitions);
auto relCreateInfo = CreateTableInfo(TableType::REL, "", info.onConflict);
relCreateInfo.propertyDefinitions = copyVector(info.propertyDefinitions);
for (auto& [srcTableName, dstTableName] : extraInfo.srcDstTablePairs) {
relCreateInfo->tableName = getRelGroupTableName(relGroupName, srcTableName, dstTableName);
relCreateInfo->extraInfo =
relCreateInfo.tableName = getRelGroupTableName(relGroupName, srcTableName, dstTableName);
relCreateInfo.extraInfo =
std::make_unique<ExtraCreateRelTableInfo>(relMultiplicity, srcTableName, dstTableName);
boundCreateRelTableInfos.push_back(bindCreateRelTableInfo(relCreateInfo.get()));
boundCreateRelTableInfos.push_back(bindCreateRelTableInfo(relCreateInfo));
}
auto boundExtraInfo =
std::make_unique<BoundExtraCreateRelTableGroupInfo>(std::move(boundCreateRelTableInfos));
return BoundCreateTableInfo(TableType::REL_GROUP, info->tableName, info->onConflict,
return BoundCreateTableInfo(TableType::REL_GROUP, info.tableName, info.onConflict,
std::move(boundExtraInfo));
}

std::unique_ptr<BoundStatement> Binder::bindCreateTable(const Statement& statement) {
auto createTable = statement.constPtrCast<CreateTable>();
auto tableName = createTable->getInfo()->tableName;
switch (createTable->getInfo()->onConflict) {
auto tableName = createTable->getInfo().tableName;
switch (createTable->getInfo().onConflict) {
case common::ConflictAction::ON_CONFLICT_THROW: {
if (clientContext->getCatalog()->containsTable(clientContext->getTx(), tableName)) {
throw BinderException(tableName + " already exists in catalog.");
Expand Down
42 changes: 42 additions & 0 deletions src/binder/bind/bind_external.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#include "binder/binder.h"
#include "catalog/catalog_entry/external_node_table_catalog_entry.h"
#include "common/exception/binder.h"
#include "main/client_context.h"
#include "main/database_manager.h"

using namespace kuzu::common;
using namespace kuzu::catalog;

namespace kuzu {
namespace binder {

catalog::TableCatalogEntry* Binder::bindExternalTableEntry(const std::string& dbName,
const std::string& tableName) {
auto attachedDB = clientContext->getDatabaseManager()->getAttachedDatabase(dbName);
if (attachedDB == nullptr) {
throw BinderException{stringFormat("No database named {} has been attached.", dbName)};
}
auto attachedCatalog = attachedDB->getCatalog();
auto tableID = attachedCatalog->getTableID(clientContext->getTx(), tableName);
return attachedCatalog->getTableCatalogEntry(clientContext->getTx(), tableID);
}

void Binder::bindExternalTableEntry(NodeOrRelExpression& nodeOrRel) {
if (nodeOrRel.isMultiLabeled() || nodeOrRel.isEmpty()) {
return;
}
auto entry = nodeOrRel.getSingleEntry();
switch (entry->getType()) {
case CatalogEntryType::EXTERNAL_NODE_TABLE_ENTRY: {
auto& tableEntry = entry->constCast<ExternalNodeTableCatalogEntry>();
auto externalEntry = bindExternalTableEntry(tableEntry.getExternalDBName(),
tableEntry.getExternalTableName());
nodeOrRel.setExternalEntry(externalEntry);
} break;
default:
break;
}
}

} // namespace binder
} // namespace kuzu
Loading