summaryrefslogtreecommitdiff
path: root/zhsh.lisp
blob: 945785ee7d932eab334cff4f1d4d03b5bd8ec02f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
;;;; zhsh.lisp

(in-package #:zhsh)

(defclass postorder-tree ()
  ((data
    :accessor tree-data
    :initarg :data
    :initform (error "Must supply a tree data array.")
    :documentation
    "An array of shape (N 3) where (aref structure i 0) is a node,
    (aref structure i 1) is the index of the node's parent, and 
    (aref structure i 2) is a list of the node's children.")))

(defun node (tree i)
  (aref (tree-data tree) i 0))

(defun parent (tree i)
  (aref (tree-data tree) i 1))

(defun parent-node (tree i)
  (when (parent-index tree i)
    (node tree (parent-index tree i))))

(defun children (tree root)
  (aref (tree-data tree) root 2))

(defun leafp (tree i)
  (null (aref (tree-data tree) i 2)))

(defun node-count (list)
  "Returns the number of nodes in the tree rooted at LIST, where atoms
   are leaf nodes and sublists are subtrees."
  (if (consp list) 
      (+ 1 (reduce #'+ (mapcar #'node-count list)))
      1))

(defun child-p (tree i j)
  "Is I a direct child of J?"
  (= (parent-index tree i) j))

(defun descendents (tree root)
  (labels ((rec (node)
             (when (children tree node)
               (append (children tree node)
                       (mapcan #'rec (children tree node))))))
    (rec root)))

(defun descendent-leaves (tree root)
  (labels
      ((rec (node)
         (when (children tree node)
           (append (remove-if-not #'local-leaf-p (children tree node))
                   (mapcan #'rec (children tree node)))))
       (local-leaf-p (node)
         (leafp tree node)))
    (rec root)))

(defun ancestors (tree node)
  (when (parent tree node)
    (cons (parent tree node)
          (ancestors tree (parent tree node)))))

(defun tree-root (tree)
  (1- (car (array-dimensions (tree-data tree)))))

(defun tree-size (tree)
  (first (array-dimensions (tree-data tree))))

(defun leftmost-leaf-below (tree i)
  (if (leafp tree i) i
      (let ((ds
              (descendent-leaves tree i)))
        (when ds 
          (apply #'min ds)))))

(defun postorder-sequentialize (list op)
  "LIST is a tree represented as a s-expression. OP is a function of
   two arguments, an integer index and a tree node, and one optional
   argument holding a parent node.

   POSTORDER-SEQUENTIALIZE LIST OP calls OP on each index and node in
   postorder.  Indices start at 0

   Example:

  (postorder-sequentialize '((a b) ((c d) e)) 
                            (lambda (idx node &optional parent)
                              (if parent 
                                (print (list idx '-> node 'under parent)
                                (print (list idx '-> node))))))

  (0 -> A UNDER (A B)) 
  (1 -> B UNDER (A B)) 
  (2 -> (A B) UNDER ((A B) ((C D) E))) 
  (3 -> C UNDER (C D)) 
  (4 -> D UNDER (C D)) 
  (5 -> (C D) UNDER ((C D) E)) 
  (6 -> E UNDER ((C D) E)) 
  (7 -> ((C D) E) UNDER ((A B) ((C D) E))) 
  (8 -> ((A B) ((C D) E))) "
  (if (null list) (funcall op 0 list) 
      (let ((idx -1))
        (labels ((rec (node &optional (parent nil parent-p))
                   (when (consp node)
                     (dolist (sub node)
                       (rec sub node)))
                   (if parent-p 
                       (funcall op (incf idx) node parent)
                       (funcall op (incf idx) node))))
          (rec list)))))


(defun list->postorder-tree (list)
  (let ((node-data
          (make-hash-table :test 'eq))
        (node-count 0))
    ;; build up data needed to intantiate a tree
    (postorder-sequentialize
     list
     (lambda (idx node &optional parent)
       (incf node-count)
       (setf (gethash node node-data) (list idx parent))))
    ;; build the tree data
    (let ((tree-data
            (make-array (list node-count 3)
                        :initial-element nil)))
      (maphash
       (lambda (node info)
         (let* ((idx
                  (first info))
                (parent-node
                  (second info))
                (parent-idx
                  (first (gethash parent-node node-data))))
           (setf (aref tree-data idx 0) node 
                 (aref tree-data idx 1) parent-idx)
           (when parent-node 
             (push idx
                   (aref tree-data parent-idx 2)))))
       node-data)
      (make-instance 'postorder-tree :data tree-data))))


(defun forest (tree i j)
  (when (<= i j) 
    (list tree i j)))

(defun forest-starts-at (f)
  (second f))

(defun forest-stops-at (f)
  (third f))

(defun empty-forest-p (f)
  (null f))

(defun forest-base-tree (f)
  (first f))

(defun subtreep (forest)
  (eql (forest-starts-at forest)
       (leftmost-leaf-below (forest-base-tree forest)
                            (forest-stops-at forest))))

(defun subtree-size (subtree)
  (length
   (descendents (forest-base-tree subtree)
                (forest-stops-at subtree))))

(defun forest-size (forest)
  (1+ (- (forest-stops-at forest)
         (forest-starts-at forest))))

(defun lr-keyroots (tree)
  (let (nodes)
    (dotimes (idx (tree-size tree) (nreverse  nodes))
      (when (or (null (parent tree idx))
                (not (eql (leftmost-leaf-below tree (parent tree idx))
                          (leftmost-leaf-below tree idx))))
        (push idx nodes)))))

(defun cost (op)
  (case (first op)
    (:delete 1)
    (:insert 1)
    (:change
     (destructuring-bind (_change i _in1 t1 _to j _in2 t2) op
       (declare (ignore _change _in1 _in2 _to))
       (cond ((equal (node t1 i) (node t2 j))
              0)
             (t
              2))))))

(defun treedist (t1 t2)
  (cond
    ((and (consp t1) (consp t2))
     (treedist (list->postorder-tree t1)
               (list->postorder-tree t2)))
    ((consp t1)
     (treedist (list->postorder-tree t1)
               t2))
    ((consp t2)
     (treedist t1 (list->postorder-tree t2)))
    (t 
     (let ((treedist
             (make-hash-table :test 'equal)))
       (dolist (k1 (lr-keyroots t1))
         (dolist (k2 (lr-keyroots t2))
           (let ((fdist
                   (make-hash-table :test 'equal))
                 (lk1
                   (leftmost-leaf-below t1 k1))
                 (lk2
                   (leftmost-leaf-below t2 k2)))
             (labels ((fkey (i j)
                        (list (if (null i) nil (forest t1 lk1 i))
                              (if (null j) nil (forest t2 lk2 j))))
                      (forestdist (i j)
                        (gethash (fkey i j) fdist))) 
               (setf (gethash (fkey nil nil) fdist) 0)
               (loop for i from lk1 to k1
                     do (setf (gethash (fkey i nil) fdist)
                              (+ (forestdist (1- i) nil)
                                 (cost (list :delete i :from t1)))))
               (loop for j from lk2 to k2
                     do (setf (gethash (fkey nil j) fdist)
                              (+ (forestdist nil (1- j))
                                 (cost (list :insert j :to t2)))))
               (loop for i from lk1 to k1 do
                 (loop for j from lk2 to k2 do
                   (cond
                     ((and (eql lk1 (leftmost-leaf-below t1 i))
                           (eql lk2 (leftmost-leaf-below t2 j)))
                      (setf
                       (gethash (fkey i j)  fdist)
                       (min
                        (+ (forestdist (1- i) j)
                           (cost (list :delete i :from t1)))
                        
                        (+ (forestdist i (1- j))
                           (cost (list :insert j :to t2)))
                        
                        (+ (forestdist (1- i) (1- j))
                           (cost (list :change i :in t1
                                       :to j :in t2))))
                       ;; then set the treedist for trees rooted at i and j
                       (gethash (list i j) treedist)
                       (forestdist i j)))
                     (t 
                      (setf
                       (gethash (fkey i j) fdist)
                       (min
                        (+ (forestdist (1- i) j)
                           (cost (list :delete i :from t1)))

                        (+ (forestdist i (1- j))
                           (cost (list :insert j :to t2)))
                        (+ (forestdist (1- i) (1- j))
                           (gethash (list i j) treedist))))))))))))
       (gethash (list (tree-root t1) (tree-root t2))
                treedist)))))