-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathT_rex.rb
executable file
·168 lines (151 loc) · 5.58 KB
/
T_rex.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# 20130628 [email protected]
# (f) Free 2013 for post.ch
PATH_SEP = "" # seperate nodes, ie "/" for directory nodes verbatim, "" for per-character nodes (more efficient, less readable)
INFIX_SEP = " / " # seperate prefix (path) from suffix (file/resource) part on output, empty if PATH_SEP not empty
#
# MONKEY PATCH STUFF
# FIXME: need namespace-containment
class Array
def tr_comparator(b) # compares self + b element wise, returns array of arrays: [ matching_elements, rest_elements_of_b ]
# non-lispy style this time...
split_index = ( self.zip(b).take_while { |pair| pair[0]==pair[1] } ).length
if split_index == 0
return [ [], b ]
else
return [b[0..(split_index-1)], b[split_index..-1]]
end
end
end
class String
def tr_tokenize # not exactly, take lines including metacharacters verbatim (ie, no further subdivision)
if /\.[\*\?]/.match self
return [ self ]
else
return self.split(PATH_SEP)
end
end
end
#
#
# T_rex (roar!)
#
#
# this version uses token-arrays instead if simple (one-character) string as node-names/content
# - enabling acceptable tree-compression on add
# - more general tokenize handling (all in String#tr_tokenize)
def debug(stuff)
puts "DEBUG: #{stuff}"
end
class T_rex
attr_reader :node # for <=>
attr_writer :terminal # ugly, probably the default for compacted-tree mode
def initialize(node = Array.new, children = Array.new)
@children = children
@terminal = false # ( not node.nil? ) and children.empty?
@node = (node.kind_of? String) ? node.tr_tokenize : node
return self
end
def member?(path)
return !lookup(path).nil?
end
def lookup(path)
path = path.tr_tokenize if path.kind_of? String
debug "lookup(\"#{path.join}\") : @node=#{@node.inspect}"
(match, rest) = (@node.empty? && !path.empty?) ? [[], path] : @node.tr_comparator(path)
if match==@node
if rest.empty?
return self # found
else
best_matching_child = self.find_best_child(rest)
return best_matching_child.lookup(rest) if not best_matching_child.nil?
end
end
return false
end
def find_best_child(rest)
candidates = []
if not @children.empty?
candidates = @children.collect do |child|
(m, r) = child.node.tr_comparator rest
child_match_length = m.length
[ child, child_match_length ] if child_match_length > 0
end
end
candidates.compact!
if not candidates.empty?
best_matching_child = (candidates.sort {|a,b| a[1]<=>b[1]})[-1][0]
else
best_matching_child = nil
end
return best_matching_child
end
def add_child(path)
# return self if self.member? path
path = path.tr_tokenize if path.kind_of? String
(match, rest) = (@node.empty?) ? [ [], path ] : @node.tr_comparator(path)
case true
when ( match.empty? and rest.empty? )
# terminate recursion
@terminal = true
when ( !rest.empty? and ( @node == match or @node.empty? ))
# add child, check for (partially) matching children, delegate
best_matching_child = find_best_child(rest)
if best_matching_child.nil?
# NEW: no match of rest among children -> new child
@children << (best_matching_child = T_rex.new(rest))
best_matching_child.terminal = true
else
# DELEGATE:
best_matching_child.add_child(rest)
end
when @node.length > match.length
# SPLIT: split current node, one child inherits all current children, the other equals rest (with no children)
# "clone" this node with non-matching path
split_child = T_rex.new(@node[match.length..-1], @children)
split_child.terminal = @terminal
new_child = T_rex.new(rest)
new_child.terminal = true
new_children = [ split_child, new_child ] # slighly confusing... this node has only two children, the first one inherits all our previous children
@node = @node[0..(match.length-1)]
@children = new_children
@terminal = false # can't be terminal immediately after splitting
else debug "T_rex::add_child: return self (fall-through)"
end
return self
end
def to_re
subex = ""
if not @children.empty?
subtree = @children.collect { |child| child.to_re }
if subtree.length==1 and not @terminal
subex = "#{subtree.first}"
else
subex = "(#{subtree.join("|")})#{"?" if @terminal}"
end
end
return "#{@node.join if not @node.nil?}#{subex}"
end
def compact_suffix(path="")
node = "#{@node.join if not @node.nil?}"
path = "#{path}#{node}"
if [email protected]? and [email protected]? # recursion-case
subtree = []
subtree = @children.collect { |child| child.compact_suffix path } # actual recursion
subex = "(#{(subtree.collect.compact {|s| s[1]}).join("|")})#{"?" if @terminal}"
return [[ path, subex ]].concat subtree
else
return [[ path, node ]] # base-case/leaf
end
end
def to_dot(path=nil)
prefix = path.nil? ? [ "digraph {", "edge [arrowtail=\"none\",arrowhead=\"none\"]", "node [shape=\"box\",style=\"rounded\"]" ] : nil
suffix = path.nil? ? "}" : nil
path="#{path}#{@node.join if not @node.nil?}"
subtree_dot = @children.sort.collect { |child| [ "#{self.object_id} -> #{child.object_id}", "#{child.to_dot(path)}" ] } if not @children.empty?
node_dot = "#{self.object_id} [label=\"#{((@node.nil?) ? "" : @node.join)}\",tooltip=\"#{path}\"#{",style=\"rounded,filled\"" if @terminal}]"
return ([ prefix, node_dot, subtree_dot, suffix ].compact.flatten.reject {|r| /^$/.match r}).join(";\n")
end
def <=>(b)
@node <=> b.node
end
end