From f253a7968df6fdcf5fae25076e72920b57a5e8cf Mon Sep 17 00:00:00 2001 From: Colin Okay Date: Thu, 27 May 2021 13:08:11 -0500 Subject: implementation of treediff algorithm --- zhsh.lisp | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 143 insertions(+), 13 deletions(-) diff --git a/zhsh.lisp b/zhsh.lisp index 62881f1..d68f349 100644 --- a/zhsh.lisp +++ b/zhsh.lisp @@ -68,23 +68,35 @@ (leafp tree node))) (rec root))) -(defun root-node (tree) +(defun ancestors (tree node) + (when (parent tree node) + (cons (parent tree node) + (ancestors tree (parent tree node))))) + +(defun tree-root (tree) (1- (car (array-dimensions (tree-data tree))))) -(defun depth (tree) - (labels ((rec (node) - (1+ - (cond - ((null (children tree node)) - 0) - ((null (rest (children tree node))) - (rec (first (children tree node)))) - (t - (apply #'max (mapcar #'rec (children tree node)))))))) - (rec (root-node tree)))) +;; (defun depth (tree) +;; (labels ((rec (node) +;; (1+ +;; (cond +;; ((null (children tree node)) +;; 0) +;; ((null (rest (children tree node))) +;; (rec (first (children tree node)))) +;; (t +;; (apply #'max (mapcar #'rec (children tree node)))))))) +;; (rec (root-node tree)))) + +(defun tree-size (tree) + (first (array-dimensions (tree-data tree)))) (defun leftmost-leaf-below (tree i) - (apply #'min (descendent-leaves tree i))) + (if (leafp tree i) i + (let ((ds + (descendent-leaves tree i))) + (when ds + (apply #'min ds))))) (defun postorder-sequentialize (list op) "LIST is a tree represented as a s-expression. OP is a function of @@ -153,3 +165,121 @@ node-data) (make-instance 'postorder-tree :data tree-data)))) + +(defun forest (tree i j) + (when (<= i j) + (list tree i j))) + +(defun forest-starts-at (f) + (second f)) + +(defun forest-stops-at (f) + (third f)) + +(defun empty-forest-p (f) + (null f)) + +(defun forest-base-tree (f) + (first f)) + +(defun subtreep (forest) + (eql (forest-starts-at forest) + (leftmost-leaf-below (forest-base-tree forest) + (forest-stops-at forest)))) + +(defun subtree-size (subtree) + (length + (descendents (forest-base-tree subtree) + (forest-stops-at subtree)))) + +(defun forest-size (forest) + (1+ (- (forest-stops-at forest) + (forest-starts-at forest)))) + +(defun lr-keyroots (tree) + (let (nodes) + (dotimes (idx (tree-size tree) (nreverse nodes)) + (when (or (null (parent tree idx)) + (not (eql (leftmost-leaf-below tree (parent tree idx)) + (leftmost-leaf-below tree idx)))) + (push idx nodes))))) + +(defun cost (op) + (case (first op) + (:rem 1) + (:add 1) + (:change + (destructuring-bind (_change i _in1 t1 _to j _in2 t2) op + (declare (ignore _change _in1 _in2 _to)) + (if (equal (node t1 i) (node t2 j)) + 0 + 1))))) + +(defun treedist (t1 t2) + (cond + ((and (consp t1) (consp t2)) + (treedist (list->postorder-tree t1) + (list->postorder-tree t2))) + ((consp t1) + (treedist (list->postorder-tree t1) + t2)) + ((consp t2) + (treedist t1 (list->postorder-tree t2))) + (t + (let ((treedist + (make-hash-table :test 'equal))) + (dolist (k1 (lr-keyroots t1)) + (dolist (k2 (lr-keyroots t2)) + (let ((fdist + (make-hash-table :test 'equal)) + (lk1 + (leftmost-leaf-below t1 k1)) + (lk2 + (leftmost-leaf-below t2 k2))) + (labels ((fkey (i j) + (list (if (null i) nil (forest t1 lk1 i)) + (if (null j) nil (forest t2 lk2 j)))) + (forestdist (i j) + (gethash (fkey i j) fdist))) + (setf (gethash (fkey nil nil) fdist) 0) + (loop for i from lk1 to k1 + do (setf (gethash (fkey i nil) fdist) + (+ (forestdist (1- i) nil) + (cost (list :rem i :from t1))))) + (loop for j from lk2 to k2 + do (setf (gethash (fkey nil j) fdist) + (+ (forestdist nil (1- j)) + (cost (list :add j :to t2))))) + (loop for i from lk1 to k1 do + (loop for j from lk2 to k2 do + (cond + ((and (eql lk1 (leftmost-leaf-below t1 i)) + (eql lk2 (leftmost-leaf-below t2 j))) + (setf + (gethash (fkey i j) fdist) + (min + (+ (forestdist (1- i) j) + (cost (list :rem i :from t1))) + + (+ (forestdist i (1- j)) + (cost (list :add j :to t2))) + + (+ (forestdist (1- i) (1- j)) + (cost (list :change i :in t1 + :to j :in t2)))) + ;; then set the treedist for trees rooted at i and j + (gethash (list i j) treedist) + (forestdist i j))) + (t + (setf + (gethash (fkey i j) fdist) + (min + (+ (forestdist (1- i) j) + (cost (list :rem i :from t1))) + + (+ (forestdist i (1- j)) + (cost (list :add j :to t2))) + (+ (forestdist (1- i) (1- j)) + (gethash (list i j) treedist)))))))))))) + (gethash (list (tree-root t1) (tree-root t2)) + treedist))))) -- cgit v1.2.3