Skip to content

Commit

Permalink
Add external node table
Browse files Browse the repository at this point in the history
  • Loading branch information
andyfengHKU committed Aug 19, 2024
1 parent 86792c1 commit e2e77a5
Show file tree
Hide file tree
Showing 65 changed files with 4,750 additions and 3,937 deletions.
60 changes: 60 additions & 0 deletions extension/duckdb/test/test_files/external_table.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
-DATASET CSV empty

--

-CASE ExternalDuckDBTable

-STATEMENT load extension "${KUZU_ROOT_DIRECTORY}/extension/duckdb/build/libduckdb.kuzu_extension"
---- ok
-STATEMENT ATTACH '${KUZU_ROOT_DIRECTORY}/dataset/databases/duckdb_database/tinysnb.db' as tinysnb (dbtype duckdb, skip_unsupported_table = true);
---- 1
Attached database successfully.
-STATEMENT BEGIN TRANSACTION;
---- ok
-STATEMENT CREATE EXTERNAL NODE TABLE duck_person AS tinysnb.person (PRIMARY KEY (ID));
---- ok
-STATEMENT COMMIT;
---- ok
-RELOADDB
-STATEMENT CALL SHOW_TABLES() RETURN *;
---- 1
duck_person|EXTERNAL_NODE|local(kuzu)|
-STATEMENT BEGIN TRANSACTION;
---- ok
-STATEMENT ALTER TABLE duck_person RENAME TO d_person;
---- ok
-STATEMENT COMMIT;
---- ok
-RELOADDB
-STATEMENT CALL SHOW_TABLES() RETURN *;
---- 1
d_person|EXTERNAL_NODE|local(kuzu)|
-STATEMENT BEGIN TRANSACTION;
---- ok
-STATEMENT DROP TABLE d_person;
---- ok
-STATEMENT COMMIT;
---- ok
-RELOADDB
-STATEMENT CALL SHOW_TABLES() RETURN *;
---- 0
-STATEMENT load extension "${KUZU_ROOT_DIRECTORY}/extension/duckdb/build/libduckdb.kuzu_extension"
---- ok
-STATEMENT ATTACH '${KUZU_ROOT_DIRECTORY}/dataset/databases/duckdb_database/tinysnb.db' as tinysnb (dbtype duckdb, skip_unsupported_table = true);
---- 1
Attached database successfully.
-STATEMENT CREATE EXTERNAL NODE TABLE duck_person AS tinysnb.person (PRIMARY KEY (ID));
---- 1
Table duck_person has been created.
-STATEMENT COPY duck_person FROM (LOAD FROM tinysnb.person RETURN ID);
---- ok
-STATEMENT MATCH (a:duck_person) RETURN a.ID, a.fName;
---- 8
0|Alice
2|Bob
3|Carol
5|Dan
7|Elizabeth
8|Farooq
9|Greg
10|Hubert Blaine Wolfeschlegelsteinhausenbergerdorff
19 changes: 14 additions & 5 deletions scripts/antlr4/Cypher.g4
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ EXPORT : ( 'E' | 'e' ) ( 'X' | 'x' ) ( 'P' | 'p' ) ( 'O' | 'o' ) ( 'R' | 'r' ) (

EXTENSION : ( 'E' | 'e' ) ( 'X' | 'x' ) ( 'T' | 't' ) ( 'E' | 'e' ) ( 'N' | 'n' ) ( 'S' | 's' ) ( 'I' | 'i' ) ( 'O' | 'o' ) ( 'N' | 'n' ) ;

EXTERNAL : ( 'E' | 'e' ) ( 'X' | 'x' ) ( 'T' | 't' ) ( 'E' | 'e' ) ( 'R' | 'r' ) ( 'N' | 'n' ) ( 'A' | 'a' ) ( 'L' | 'l' ) ;

FALSE : ( 'F' | 'f' ) ( 'A' | 'a' ) ( 'L' | 'l' ) ( 'S' | 's' ) ( 'E' | 'e' ) ;

FROM : ( 'F' | 'f' ) ( 'R' | 'r' ) ( 'O' | 'o' ) ( 'M' | 'm' ) ;
Expand Down Expand Up @@ -225,6 +227,7 @@ oC_Cypher
oC_Statement
: oC_Query
| kU_CreateNodeTable
| kU_CreateExternalNodeTable
| kU_CreateRelTable
| kU_CreateRelTableGroup
| kU_CreateRdfGraph
Expand Down Expand Up @@ -312,10 +315,13 @@ kU_IfNotExists
: IF SP NOT SP EXISTS ;

kU_CreateNodeTable
: CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint ) SP? ')' ;
: CREATE SP NODE SP TABLE SP ( kU_IfNotExists SP )? oC_SchemaName SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_PrimaryKey ) SP? ')' ;

kU_CreateExternalNodeTable
: CREATE SP EXTERNAL SP NODE SP TABLE SP oC_SchemaName SP AS SP oC_SchemaName kU_TableLookup SP? '(' SP? kU_PrimaryKey SP? ')' ;

kU_CreateRelTable
: CREATE SP REL SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName SP? '(' SP? kU_RelTableConnection SP? ( ',' SP? kU_PropertyDefinitions SP? )? ( ',' SP? oC_SymbolicName SP? )? ')' ;
: CREATE SP REL SP TABLE SP ( kU_IfNotExists SP )? oC_SchemaName SP? '(' SP? kU_RelTableConnection SP? ( ',' SP? kU_PropertyDefinitions SP? )? ( ',' SP? oC_SymbolicName SP? )? ')' ;

kU_CreateRelTableGroup
: CREATE SP REL SP TABLE SP GROUP SP (kU_IfNotExists SP)? oC_SchemaName SP? '(' SP? kU_RelTableConnection ( SP? ',' SP? kU_RelTableConnection )+ SP? ( ',' SP? kU_PropertyDefinitions SP? )? ( ',' SP? oC_SymbolicName SP? )? ')' ;
Expand All @@ -324,10 +330,10 @@ kU_RelTableConnection
: FROM SP oC_SchemaName SP TO SP oC_SchemaName ;

kU_CreateRdfGraph
: CREATE SP RDFGRAPH SP (kU_IfNotExists SP)? oC_SchemaName ;
: CREATE SP RDFGRAPH SP ( kU_IfNotExists SP )? oC_SchemaName ;

kU_CreateSequence
: CREATE SP SEQUENCE SP (kU_IfNotExists SP)? oC_SchemaName (SP kU_SequenceOptions)* ;
: CREATE SP SEQUENCE SP ( kU_IfNotExists SP )? oC_SchemaName ( SP kU_SequenceOptions )* ;

kU_CreateType
: CREATE SP TYPE SP oC_SchemaName SP AS SP kU_DataType SP? ;
Expand Down Expand Up @@ -387,7 +393,7 @@ kU_PropertyDefinitions : kU_PropertyDefinition ( SP? ',' SP? kU_PropertyDefiniti

kU_PropertyDefinition : kU_ColumnDefinition ( SP kU_Default )? ;

kU_CreateNodeConstraint : PRIMARY SP KEY SP? '(' SP? oC_PropertyKeyName SP? ')' ;
kU_PrimaryKey : PRIMARY SP KEY SP? '(' SP? oC_PropertyKeyName SP? ')' ;

DECIMAL: ( 'D' | 'd' ) ( 'E' | 'e' ) ( 'C' | 'c' ) ( 'I' | 'i' ) ( 'M' | 'm' ) ( 'A' | 'a' ) ( 'L' | 'l' ) ;

Expand Down Expand Up @@ -788,6 +794,9 @@ kU_CountSubquery
oC_PropertyLookup
: '.' SP? ( oC_PropertyKeyName | STAR ) ;

kU_TableLookup
: '.' SP? oC_SchemaName ;

oC_CaseExpression
: ( ( CASE ( SP? oC_CaseAlternative )+ ) | ( CASE SP? oC_Expression ( SP? oC_CaseAlternative )+ ) ) ( SP? ELSE SP? oC_Expression )? SP? END ;

Expand Down
2 changes: 1 addition & 1 deletion scripts/antlr4/hash.md5
Original file line number Diff line number Diff line change
@@ -1 +1 @@
bc82630bdea96b23de7acaa41822b5da
4e2233175e5b207817cf6f09c1f9c14c
17 changes: 12 additions & 5 deletions src/antlr4/Cypher.g4
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ oC_Cypher
oC_Statement
: oC_Query
| kU_CreateNodeTable
| kU_CreateExternalNodeTable
| kU_CreateRelTable
| kU_CreateRelTableGroup
| kU_CreateRdfGraph
Expand Down Expand Up @@ -95,10 +96,13 @@ kU_IfNotExists
: IF SP NOT SP EXISTS ;

kU_CreateNodeTable
: CREATE SP NODE SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_CreateNodeConstraint ) SP? ')' ;
: CREATE SP NODE SP TABLE SP ( kU_IfNotExists SP )? oC_SchemaName SP? '(' SP? kU_PropertyDefinitions SP? ( ',' SP? kU_PrimaryKey ) SP? ')' ;

kU_CreateExternalNodeTable
: CREATE SP EXTERNAL SP NODE SP TABLE SP oC_SchemaName SP AS SP oC_SchemaName kU_TableLookup SP? '(' SP? kU_PrimaryKey SP? ')' ;

kU_CreateRelTable
: CREATE SP REL SP TABLE SP (kU_IfNotExists SP)? oC_SchemaName SP? '(' SP? kU_RelTableConnection SP? ( ',' SP? kU_PropertyDefinitions SP? )? ( ',' SP? oC_SymbolicName SP? )? ')' ;
: CREATE SP REL SP TABLE SP ( kU_IfNotExists SP )? oC_SchemaName SP? '(' SP? kU_RelTableConnection SP? ( ',' SP? kU_PropertyDefinitions SP? )? ( ',' SP? oC_SymbolicName SP? )? ')' ;

kU_CreateRelTableGroup
: CREATE SP REL SP TABLE SP GROUP SP (kU_IfNotExists SP)? oC_SchemaName SP? '(' SP? kU_RelTableConnection ( SP? ',' SP? kU_RelTableConnection )+ SP? ( ',' SP? kU_PropertyDefinitions SP? )? ( ',' SP? oC_SymbolicName SP? )? ')' ;
Expand All @@ -107,10 +111,10 @@ kU_RelTableConnection
: FROM SP oC_SchemaName SP TO SP oC_SchemaName ;

kU_CreateRdfGraph
: CREATE SP RDFGRAPH SP (kU_IfNotExists SP)? oC_SchemaName ;
: CREATE SP RDFGRAPH SP ( kU_IfNotExists SP )? oC_SchemaName ;

kU_CreateSequence
: CREATE SP SEQUENCE SP (kU_IfNotExists SP)? oC_SchemaName (SP kU_SequenceOptions)* ;
: CREATE SP SEQUENCE SP ( kU_IfNotExists SP )? oC_SchemaName ( SP kU_SequenceOptions )* ;

kU_CreateType
: CREATE SP TYPE SP oC_SchemaName SP AS SP kU_DataType SP? ;
Expand Down Expand Up @@ -170,7 +174,7 @@ kU_PropertyDefinitions : kU_PropertyDefinition ( SP? ',' SP? kU_PropertyDefiniti

kU_PropertyDefinition : kU_ColumnDefinition ( SP kU_Default )? ;

kU_CreateNodeConstraint : PRIMARY SP KEY SP? '(' SP? oC_PropertyKeyName SP? ')' ;
kU_PrimaryKey : PRIMARY SP KEY SP? '(' SP? oC_PropertyKeyName SP? ')' ;

DECIMAL: ( 'D' | 'd' ) ( 'E' | 'e' ) ( 'C' | 'c' ) ( 'I' | 'i' ) ( 'M' | 'm' ) ( 'A' | 'a' ) ( 'L' | 'l' ) ;

Expand Down Expand Up @@ -571,6 +575,9 @@ kU_CountSubquery
oC_PropertyLookup
: '.' SP? ( oC_PropertyKeyName | STAR ) ;

kU_TableLookup
: '.' SP? oC_SchemaName ;

oC_CaseExpression
: ( ( CASE ( SP? oC_CaseAlternative )+ ) | ( CASE SP? oC_Expression ( SP? oC_CaseAlternative )+ ) ) ( SP? ELSE SP? oC_Expression )? SP? END ;

Expand Down
1 change: 1 addition & 0 deletions src/antlr4/keywords.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ EXISTS
EXPLAIN
EXPORT
EXTENSION
EXTERNAL
FALSE
FROM
GLOB
Expand Down
1 change: 1 addition & 0 deletions src/binder/bind/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ add_library(
bind_transaction.cpp
bind_updating_clause.cpp
bind_extension.cpp
bind_external.cpp
bind_export_database.cpp
bind_import_database.cpp
bind_use_database.cpp)
Expand Down
78 changes: 57 additions & 21 deletions src/binder/bind/bind_ddl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,14 @@ static void validatePrimaryKey(const std::string& pkColName,
}
}

BoundCreateTableInfo Binder::bindCreateTableInfo(const parser::CreateTableInfo* info) {
switch (info->tableType) {
BoundCreateTableInfo Binder::bindCreateTableInfo(const parser::CreateTableInfo& info) {
switch (info.tableType) {
case TableType::NODE: {
return bindCreateNodeTableInfo(info);
}
case TableType::EXTERNAL_NODE: {
return bindCreateExternalNodeTableInfo(info);
}
case TableType::REL: {
return bindCreateRelTableInfo(info);
}
Expand All @@ -128,24 +131,57 @@ BoundCreateTableInfo Binder::bindCreateTableInfo(const parser::CreateTableInfo*
}
}

BoundCreateTableInfo Binder::bindCreateNodeTableInfo(const CreateTableInfo* info) {
auto propertyDefinitions = bindPropertyDefinitions(info->propertyDefinitions, info->tableName);
auto& extraInfo = info->extraInfo->constCast<ExtraCreateNodeTableInfo>();
BoundCreateTableInfo Binder::bindCreateNodeTableInfo(const CreateTableInfo& info) {
auto propertyDefinitions = bindPropertyDefinitions(info.propertyDefinitions, info.tableName);
auto& extraInfo = info.extraInfo->constCast<ExtraCreateNodeTableInfo>();
validatePrimaryKey(extraInfo.pKName, propertyDefinitions);
auto boundExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(extraInfo.pKName,
std::move(propertyDefinitions));
return BoundCreateTableInfo(TableType::NODE, info->tableName, info->onConflict,
return BoundCreateTableInfo(TableType::NODE, info.tableName, info.onConflict,
std::move(boundExtraInfo));
}

BoundCreateTableInfo Binder::bindCreateRelTableInfo(const CreateTableInfo* info) {
static PropertyDefinition getDefinition(const std::vector<PropertyDefinition>& definitions, const std::string& name) {
for (auto& definition : definitions) {
if (definition.getName() == name) {
return definition.copy();
}
}
// LCOV_EXCL_START
throw BinderException(stringFormat("Cannot find property with name {}.", name));
// LCOV_EXCL_STOP
}

static std::string getPhysicalTableName(std::string name) {
return "_" + name;
}

BoundCreateTableInfo Binder::bindCreateExternalNodeTableInfo(const CreateTableInfo& info) {
auto& extraInfo = info.extraInfo->constCast<ExtraCreateExternalNodeTableInfo>();
auto entry = bindExternalTableEntry(extraInfo.dbName, extraInfo.tableName);
auto& propertyDefinitions = entry->getProperties();
// Bind physical create node table info
auto pkDefinition = getDefinition(propertyDefinitions, extraInfo.pkName);
std::vector<PropertyDefinition> physicalPropertyDefinitions;
physicalPropertyDefinitions.push_back(pkDefinition.copy());
auto boundPhysicalExtraInfo = std::make_unique<BoundExtraCreateNodeTableInfo>(extraInfo.pkName, std::move(physicalPropertyDefinitions));
auto boundPhysicalCreateInfo = BoundCreateTableInfo(TableType::NODE,
getPhysicalTableName(info.tableName), ConflictAction::ON_CONFLICT_THROW,
std::move(boundPhysicalExtraInfo));
// Bind create node table reference info
auto boundExtraInfo = std::make_unique<BoundExtraCreateExternalNodeTableInfo>(extraInfo.pkName,
extraInfo.dbName, extraInfo.tableName, std::move(boundPhysicalCreateInfo), copyVector(propertyDefinitions));
return BoundCreateTableInfo(TableType::EXTERNAL_NODE, info.tableName, info.onConflict, std::move(boundExtraInfo));
}

BoundCreateTableInfo Binder::bindCreateRelTableInfo(const CreateTableInfo& info) {
std::vector<PropertyDefinition> propertyDefinitions;
propertyDefinitions.emplace_back(
ColumnDefinition(InternalKeyword::ID, LogicalType::INTERNAL_ID()));
for (auto& definition : bindPropertyDefinitions(info->propertyDefinitions, info->tableName)) {
for (auto& definition : bindPropertyDefinitions(info.propertyDefinitions, info.tableName)) {
propertyDefinitions.push_back(definition.copy());
}
auto& extraInfo = info->extraInfo->constCast<ExtraCreateRelTableInfo>();
auto& extraInfo = info.extraInfo->constCast<ExtraCreateRelTableInfo>();
auto srcMultiplicity = RelMultiplicityUtils::getFwd(extraInfo.relMultiplicity);
auto dstMultiplicity = RelMultiplicityUtils::getBwd(extraInfo.relMultiplicity);
auto srcTableID = bindTableID(extraInfo.srcTableName);
Expand All @@ -154,7 +190,7 @@ BoundCreateTableInfo Binder::bindCreateRelTableInfo(const CreateTableInfo* info)
validateTableType(dstTableID, TableType::NODE);
auto boundExtraInfo = std::make_unique<BoundExtraCreateRelTableInfo>(srcMultiplicity,
dstMultiplicity, srcTableID, dstTableID, std::move(propertyDefinitions));
return BoundCreateTableInfo(TableType::REL, info->tableName, info->onConflict,
return BoundCreateTableInfo(TableType::REL, info.tableName, info.onConflict,
std::move(boundExtraInfo));
}

Expand All @@ -163,29 +199,29 @@ static std::string getRelGroupTableName(const std::string& relGroupName,
return relGroupName + "_" + srcTableName + "_" + dstTableName;
}

BoundCreateTableInfo Binder::bindCreateRelTableGroupInfo(const CreateTableInfo* info) {
auto relGroupName = info->tableName;
auto& extraInfo = info->extraInfo->constCast<ExtraCreateRelTableGroupInfo>();
BoundCreateTableInfo Binder::bindCreateRelTableGroupInfo(const CreateTableInfo& info) {
auto relGroupName = info.tableName;
auto& extraInfo = info.extraInfo->constCast<ExtraCreateRelTableGroupInfo>();
auto relMultiplicity = extraInfo.relMultiplicity;
std::vector<BoundCreateTableInfo> boundCreateRelTableInfos;
auto relCreateInfo = std::make_unique<CreateTableInfo>(TableType::REL, "", info->onConflict);
relCreateInfo->propertyDefinitions = copyVector(info->propertyDefinitions);
auto relCreateInfo = CreateTableInfo(TableType::REL, "", info.onConflict);
relCreateInfo.propertyDefinitions = copyVector(info.propertyDefinitions);
for (auto& [srcTableName, dstTableName] : extraInfo.srcDstTablePairs) {
relCreateInfo->tableName = getRelGroupTableName(relGroupName, srcTableName, dstTableName);
relCreateInfo->extraInfo =
relCreateInfo.tableName = getRelGroupTableName(relGroupName, srcTableName, dstTableName);
relCreateInfo.extraInfo =
std::make_unique<ExtraCreateRelTableInfo>(relMultiplicity, srcTableName, dstTableName);
boundCreateRelTableInfos.push_back(bindCreateRelTableInfo(relCreateInfo.get()));
boundCreateRelTableInfos.push_back(bindCreateRelTableInfo(relCreateInfo));
}
auto boundExtraInfo =
std::make_unique<BoundExtraCreateRelTableGroupInfo>(std::move(boundCreateRelTableInfos));
return BoundCreateTableInfo(TableType::REL_GROUP, info->tableName, info->onConflict,
return BoundCreateTableInfo(TableType::REL_GROUP, info.tableName, info.onConflict,
std::move(boundExtraInfo));
}

std::unique_ptr<BoundStatement> Binder::bindCreateTable(const Statement& statement) {
auto createTable = statement.constPtrCast<CreateTable>();
auto tableName = createTable->getInfo()->tableName;
switch (createTable->getInfo()->onConflict) {
auto tableName = createTable->getInfo().tableName;
switch (createTable->getInfo().onConflict) {
case common::ConflictAction::ON_CONFLICT_THROW: {
if (clientContext->getCatalog()->containsTable(clientContext->getTx(), tableName)) {
throw BinderException(tableName + " already exists in catalog.");
Expand Down
41 changes: 41 additions & 0 deletions src/binder/bind/bind_external.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#include "binder/binder.h"
#include "main/database_manager.h"
#include "common/exception/binder.h"
#include "main/client_context.h"
#include "catalog/catalog_entry/external_node_table_catalog_entry.h"

using namespace kuzu::common;
using namespace kuzu::catalog;

namespace kuzu {
namespace binder {

catalog::TableCatalogEntry* Binder::bindExternalTableEntry(const std::string& dbName,
const std::string& tableName) {
auto attachedDB = clientContext->getDatabaseManager()->getAttachedDatabase(dbName);
if (attachedDB == nullptr) {
throw BinderException{stringFormat("No database named {} has been attached.", dbName)};
}
auto attachedCatalog = attachedDB->getCatalog();
auto tableID = attachedCatalog->getTableID(clientContext->getTx(), tableName);
return attachedCatalog->getTableCatalogEntry(clientContext->getTx(), tableID);
}

void Binder::bindExternalTableEntry(NodeOrRelExpression& nodeOrRel) {
if (nodeOrRel.isMultiLabeled() || nodeOrRel.isEmpty()) {
return ;
}
auto entry = nodeOrRel.getSingleEntry();
switch (entry->getType()) {
case CatalogEntryType::EXTERNAL_NODE_TABLE_ENTRY: {
auto& tableEntry = entry->constCast<ExternalNodeTableCatalogEntry>();
auto externalEntry = bindExternalTableEntry(tableEntry.getExternalDBName(), tableEntry.getExternalTableName());
nodeOrRel.setExternalEntry(externalEntry);
} break ;
default:
break;
}
}

}
}
Loading

0 comments on commit e2e77a5

Please sign in to comment.