Skip to content

Commit

Permalink
Implement callback iterations; move all callback functions to separat…
Browse files Browse the repository at this point in the history
…e module.
  • Loading branch information
scossu committed Mar 6, 2019
1 parent ce0025e commit bdfe4bd
Show file tree
Hide file tree
Showing 10 changed files with 484 additions and 340 deletions.
5 changes: 2 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@
[submodule "ext/tpl"]
path = ext/tpl
url = https://github.com/troydhanson/tpl.git
branch = stable
[submodule "ext/spookyhash"]
path = ext/spookyhash
url = https://github.com/centaurean/spookyhash.git
[submodule "ext/collections-c"]
path = ext/collections-c
url = https://github.com/srdja/Collections-C.git
path = ext/collections-c
url = https://github.com/srdja/Collections-C.git
43 changes: 43 additions & 0 deletions lakesuperior/model/graph/callbacks.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from libc.stdint cimport uint32_t, uint64_t

from lakesuperior.model.base cimport Buffer
from lakesuperior.model.graph cimport graph
from lakesuperior.model.graph.triple cimport BufferTriple

cdef extern from 'spookyhash_api.h':
uint64_t spookyhash_64(const void *input, size_t input_size, uint64_t seed)

cdef:
bint graph_eq_fn(graph.SimpleGraph g1, graph.SimpleGraph g2)
int term_cmp_fn(const void* key1, const void* key2)
int trp_cmp_fn(const void* key1, const void* key2)
size_t term_hash_fn(const void* key, int l, uint32_t seed)
size_t trp_hash_fn(const void* key, int l, uint32_t seed)
bint lookup_none_cmp_fn(
const BufferTriple *trp, const Buffer *t1, const Buffer *t2
)
bint lookup_s_cmp_fn(
const BufferTriple *trp, const Buffer *t1, const Buffer *t2
)
bint lookup_p_cmp_fn(
const BufferTriple *trp, const Buffer *t1, const Buffer *t2
)
bint lookup_o_cmp_fn(
const BufferTriple *trp, const Buffer *t1, const Buffer *t2
)
bint lookup_sp_cmp_fn(
const BufferTriple *trp, const Buffer *t1, const Buffer *t2
)
bint lookup_so_cmp_fn(
const BufferTriple *trp, const Buffer *t1, const Buffer *t2
)
bint lookup_po_cmp_fn(
const BufferTriple *trp, const Buffer *t1, const Buffer *t2
)
void add_trp_callback(
graph.SimpleGraph gr, const BufferTriple* trp, void* ctx
)
void del_trp_callback(
graph.SimpleGraph gr, const BufferTriple* trp, void* ctx
)

249 changes: 249 additions & 0 deletions lakesuperior/model/graph/callbacks.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
import logging

from libc.stdint cimport uint32_t, uint64_t
from libc.string cimport memcmp

from lakesuperior.cy_include cimport collections as cc
from lakesuperior.cy_include cimport spookyhash as sph
from lakesuperior.model.base cimport Buffer, buffer_dump
from lakesuperior.model.graph cimport graph
from lakesuperior.model.graph.triple cimport BufferTriple

logger = logging.getLogger(__name__)


cdef int term_cmp_fn(const void* key1, const void* key2):
"""
Compare function for two Buffer objects.
:rtype: int
:return: 0 if the byte streams are the same, another integer otherwise.
"""
b1 = <Buffer *>key1
b2 = <Buffer *>key2

if b1.sz != b2.sz:
logger.info(f'Sizes differ: {b1.sz} != {b2.sz}. Return 1.')
return 1

cdef int cmp = memcmp(b1.addr, b2.addr, b1.sz)
logger.info(f'term memcmp: {cmp}')
return cmp


cdef int trp_cmp_fn(const void* key1, const void* key2):
"""
Compare function for two triples in a set.
s, p, o byte data are compared literally.
:rtype: int
:return: 0 if all three terms point to byte-wise identical data in both
triples.
"""
t1 = <BufferTriple *>key1
t2 = <BufferTriple *>key2

diff = (
term_cmp_fn(t1.o, t2.o) or
term_cmp_fn(t1.s, t2.s) or
term_cmp_fn(t1.p, t2.p)
)

logger.info(f'Triples match: {not(diff)}')
return diff


#cdef int trp_cmp_fn(const void* key1, const void* key2):
# """
# Compare function for two triples in a set.
#
# Here, pointers to terms are compared for s, p, o. The pointers should be
# guaranteed to point to unique values (i.e. no two pointers have the same
# term value within a graph).
#
# :rtype: int
# :return: 0 if the addresses of all terms are the same, 1 otherwise.
# """
# t1 = <BufferTriple *>key1
# t2 = <BufferTriple *>key2
#
# cdef int is_not_equal = (
# t1.s.addr != t2.s.addr or
# t1.p.addr != t2.p.addr or
# t1.o.addr != t2.o.addr
# )
#
# logger.info(f'Triples match: {not(is_not_equal)}')
# return is_not_equal


cdef bint graph_eq_fn(graph.SimpleGraph g1, graph.SimpleGraph g2):
"""
Compare 2 graphs for equality.
Note that this returns the opposite value than the triple and term
compare functions: 1 (True) if equal, 0 (False) if not.
"""
cdef:
void* el
cc.HashSetIter it

cc.hashset_iter_init(&it, g1._triples)
while cc.hashset_iter_next(&it, &el) != cc.CC_ITER_END:
if cc.hashset_contains(g2._triples, el):
return False

return True


cdef size_t term_hash_fn(const void* key, int l, uint32_t seed):
"""
Hash function for serialized terms (:py:class:`Buffer` objects)
"""
return <size_t>spookyhash_64((<Buffer*>key).addr, (<Buffer*>key).sz, seed)


cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
"""
Hash function for sets of (serialized) triples.
This function concatenates the literal terms of the triple as bytes
and computes their hash.
"""
trp = <BufferTriple*>key
seed64 = <uint64_t>seed
seed_dummy = seed64

cdef sph.spookyhash_context ctx

sph.spookyhash_context_init(&ctx, seed64, seed_dummy)
sph.spookyhash_update(&ctx, trp.s.addr, trp.s.sz)
sph.spookyhash_update(&ctx, trp.s.addr, trp.p.sz)
sph.spookyhash_update(&ctx, trp.s.addr, trp.o.sz)
sph.spookyhash_final(&ctx, &seed64, &seed_dummy)

return <size_t>seed64


#cdef size_t trp_hash_fn(const void* key, int l, uint32_t seed):
# """
# Hash function for sets of (serialized) triples.
#
# This function computes the hash of the concatenated pointer values in the
# s, p, o members of the triple. The triple structure is treated as a byte
# string. This is safe in spite of byte-wise struct evaluation being a
# frowned-upon practice (due to padding issues), because it is assumed that
# the input value is always the same type of structure.
# """
# return <size_t>spookyhash_64(key, l, seed)


#cdef size_t hash_ptr_passthrough(const void* key, int l, uint32_t seed):
# """
# No-op function that takes a pointer and does *not* hash it.
#
# The pointer value is used as the "hash".
# """
# return <size_t>key


cdef inline bint lookup_none_cmp_fn(
const BufferTriple *trp, const Buffer *t1, const Buffer *t2
):
"""
Dummy callback for queries with all parameters unbound.
This function always returns ``True``
"""
return True


cdef inline bint lookup_s_cmp_fn(
const BufferTriple *trp, const Buffer *t1, const Buffer *t2
):
"""
Lookup callback compare function for a given ``s`` in a triple.
The function returns ``True`` if ``t1`` matches the first term.
``t2`` is not used and is declared only for compatibility with the
other interchangeable functions.
"""
return not term_cmp_fn(t1, trp[0].s)


cdef inline bint lookup_p_cmp_fn(
const BufferTriple *trp, const Buffer *t1, const Buffer *t2
):
"""
Lookup callback compare function for a given ``p`` in a triple.
"""
return not term_cmp_fn(t1, trp[0].p)


cdef inline bint lookup_o_cmp_fn(
const BufferTriple *trp, const Buffer *t1, const Buffer *t2
):
"""
Lookup callback compare function for a given ``o`` in a triple.
"""
return not term_cmp_fn(t1, trp[0].o)


cdef inline bint lookup_sp_cmp_fn(
const BufferTriple *trp, const Buffer *t1, const Buffer *t2
):
"""
Lookup callback compare function for a given ``s`` and ``p`` pair.
"""
return (
not term_cmp_fn(t1, trp[0].s)
and not term_cmp_fn(t2, trp[0].p))


cdef inline bint lookup_so_cmp_fn(
const BufferTriple *trp, const Buffer *t1, const Buffer *t2
):
"""
Lookup callback compare function for a given ``s`` and ``o`` pair.
"""
return (
not term_cmp_fn(t1, trp[0].s)
and not term_cmp_fn(t2, trp[0].o))


cdef inline bint lookup_po_cmp_fn(
const BufferTriple *trp, const Buffer *t1, const Buffer *t2
):
"""
Lookup callback compare function for a given ``p`` and ``o`` pair.
"""
return (
not term_cmp_fn(t1, trp[0].p)
and not term_cmp_fn(t2, trp[0].o))


## LOOKUP CALLBACK FUNCTIONS

cdef inline void add_trp_callback(
graph.SimpleGraph gr, const BufferTriple* trp, void* ctx
):
"""
Add a triple to a graph as a result of a lookup callback.
"""
gr.add_triple(trp, True)


cdef inline void del_trp_callback(
graph.SimpleGraph gr, const BufferTriple* trp, void* ctx
):
"""
Remove a triple from a graph as a result of a lookup callback.
"""
logger.info('removing triple: {} {} {}'.format(
buffer_dump(trp.s), buffer_dump(trp.p), buffer_dump(trp.o)
))
gr.remove_triple(trp)


27 changes: 14 additions & 13 deletions lakesuperior/model/graph/graph.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,20 @@ from cymem.cymem cimport Pool
from lakesuperior.cy_include cimport collections as cc
from lakesuperior.model.base cimport Buffer
from lakesuperior.model.graph.triple cimport BufferTriple
from lakesuperior.model.structures.keyset cimport Keyset

# Lookup function that returns whether a triple contains a match pattern.
# Return True if the triple exists, False otherwise.
ctypedef bint (*lookup_fn_t)(
const BufferTriple *trp, const Buffer *t1, const Buffer *t2)

# Callback for an iterator.
ctypedef void (*lookup_callback_fn_t)(
SimpleGraph gr, const BufferTriple* trp, void* ctx
)

ctypedef Buffer SPOBuffer[3]
ctypedef Buffer *BufferPtr

cdef:
int term_cmp_fn(const void* key1, const void* key2)
int trp_cmp_fn(const void* key1, const void* key2)
bint graph_eq_fn(SimpleGraph g1, SimpleGraph g2)
size_t trp_hash_fn(const void* key, int l, uint32_t seed)
size_t hash_ptr_passthrough(const void* key, int l, uint32_t seed)

cdef class SimpleGraph:
cdef:
cc.HashSet *_terms # Set of unique serialized terms.
Expand All @@ -32,28 +30,31 @@ cdef class SimpleGraph:
cc.key_compare_ft trp_cmp_fn

inline BufferTriple* store_triple(self, const BufferTriple* strp)
inline void add_triple(self, BufferTriple *trp, bint add=*) except *
int remove_triple(self, BufferTriple* trp_buf) except -1
bint trp_contains(self, BufferTriple* btrp)
inline void add_triple(self, const BufferTriple *trp, bint add=*) except *
int remove_triple(self, const BufferTriple* trp_buf) except -1
bint trp_contains(self, const BufferTriple* btrp)

# Basic graph operations.
void ip_union(self, SimpleGraph other) except *
void ip_subtraction(self, SimpleGraph other) except *
void ip_intersection(self, SimpleGraph other) except *
void ip_xor(self, SimpleGraph other) except *
SimpleGraph empty_copy(self)
void _match_ptn_callback(
self, pattern, SimpleGraph gr,
lookup_callback_fn_t callback_fn, void* ctx=*
) except *

cpdef union_(self, SimpleGraph other)
cpdef subtraction(self, SimpleGraph other)
cpdef intersection(self, SimpleGraph other)
cpdef xor(self, SimpleGraph other)
cpdef void set(self, tuple trp) except *
cpdef void remove_triples(self, pattern) except *


cdef class Imr(SimpleGraph):
cdef:
readonly str uri
readonly str id
Imr empty_copy(self)

cpdef as_rdflib(self)
Loading

0 comments on commit bdfe4bd

Please sign in to comment.