;; -*- Mode: lisp; Syntax: ansi-common-lisp; Package: DATA-STRUCTURES; Base: 10 -*- ;; ;; Copyright (C) Paul Meurer 2001. All rights reserved. ;; paul.meurer@hit.uib.no ;; HIT-centre, University of Bergen ;; ;; set implementation based on alphabetic trees ;; ;; Version 0.1 ;; ;;------------------------------------------------------------------------------------- ;; TO DO: merge code with string-net ;;------------------------------------------------------------------------------------- (in-package :dat) (defun make-tree-set () (list 0)) (defun insert-char (char-node node) (let ((char (car char-node))) (labels ((insert (branch) (cond ((null (cdr branch)) (setf (cdr branch) (list char-node))) ((null (cadr branch)) (insert (cdr branch))) ((char< (caadr branch) char) (insert (cdr branch))) (t (push char-node (cdr branch)))))) (insert node)))) (defmethod add-string ((tree list) string) "adds a string to a string tree" (let ((length (length string))) (labels ((add-char (pos node) (declare (fixnum pos n)) (if (= pos length) (unless (member nil (cdr node)) (push nil (cdr node)) ; end marker (incf (car tree))) (let* ((char (char string pos)) (sub-node (find char (cdr node) :key #'car))) (cond ((null sub-node) (let ((new-node (list char))) (insert-char new-node node) #+non-ordered (push new-node (cdr node)) (add-char (the fixnum (1+ pos)) new-node))) (t (add-char (the fixnum (1+ pos)) sub-node))))))) (add-char 0 tree) ;;(setf list-tree (cdr ltree)) string))) (defun copy-tree-set (tree-set) (copy-tree tree-set)) (defun tree-set-length (tree-set) (car tree-set)) ;; compares two tree-sets (defun tree-set< (tree1 tree2) (labels ((walk (node1 node2) (if (or (numberp (car node1)) (eq (car node1) (car node2))) (mapc #'walk (cdr node1) (cdr node2)) (return-from tree-set< (cond ((null node1) t) ((null node2) nil) (t (char< (car node1) (car node2)))))))) (walk tree1 tree2) :equal)) #+test (do-tree-set (string '(4 (#\d (#\e (#\e (#\p (#\- (#\s (#\e (#\a (#\t (#\e (#\d nil))))))) (#\s (#\e (#\t nil))))))) (#\n (#\e (#\e (#\d (#\f (#\u (#\l nil))))))) (#\s (#\t (#\e (#\r (#\n nil))))))) (print string)) #+test (tree-sets-intersect-p '(4 (#\d (#\e (#\e (#\p nil)))) (#\f (#\u (#\l (#\l nil)))) (#\p (#\r (#\o (#\f (#\o (#\u (#\n (#\d nil)))))))) (#\t (#\h (#\o (#\r (#\o (#\u (#\g (#\h nil))))))))) '(4 (#\d (#\e (#\e (#\p (#\- (#\s (#\e (#\a (#\t (#\e (#\d nil))))))) (#\s (#\e (#\t nil))))))) (#\n (#\e (#\e (#\d (#\f (#\u (#\l nil))))))) (#\s (#\t (#\e (#\r (#\n nil))))))) #+test (tree-set< '(4 (#\d (#\e (#\e (#\p nil)))) (#\f (#\u (#\l (#\l nil)))) (#\p (#\r (#\o (#\f (#\o (#\u (#\n (#\d nil)))))))) (#\t (#\h (#\o (#\r (#\o (#\u (#\g (#\h nil))))))))) '(4 (#\d (#\e (#\e (#\p (#\- (#\s (#\e (#\a (#\t (#\e (#\d nil))))))) (#\s (#\e (#\t nil))))))) (#\n (#\e (#\e (#\d (#\f (#\u (#\l nil))))))) (#\s (#\t (#\e (#\r (#\n nil))))))) (defun string-in-tree-p (tree string &optional (start 0)) (let ((length (length string))) (labels ((find-string (pos node) (declare (fixnum pos n)) (if (= pos length) (when (member nil (cdr node)) (return-from string-in-tree-p t)) (let* ((char (char string pos)) (sub-node (find char (cdr node) :key #'car))) (cond ((null sub-node) (return-from string-in-tree-p nil)) (t (find-string (the fixnum (1+ pos)) sub-node))))))) (find-string start tree) nil))) (defmacro do-tree-set ((string tree-set) &body body) (let ((node (gensym))) `(let ((,string (make-array 0 :element-type #-mcl 'character #+mcl 'base-character :adjustable t :fill-pointer t))) (declare (string string)) (labels ((walk (,node) (cond ((null ,node) (let ((,string (copy-seq ,string))) ,@body)) (t (vector-push-extend (car ,node) ,string) (mapc #'walk (cdr ,node)) (decf (fill-pointer ,string)))))) (mapc #'walk (cdr ,tree-set))) nil))) (defun %minimal-nodes (node-list) (loop with min-node = nil for nodes on node-list do (cond ((null (car nodes)) (loop for nodes on node-list when (null (car nodes)) do (setf (car nodes) t)) (return nil)) ((eq (car nodes) t) nil) ((null min-node) (setf min-node nodes)) ((char< (caar nodes) (caar min-node)) (setf min-node nodes)) (t nil)) finally (return (cond ((null min-node) t) ((null (car min-node)) nil) (t (let ((char (caar min-node))) (loop for nodes on node-list when (and (not (eq (car nodes) t)) (eq (caar nodes) char)) collect (prog1 (car nodes) (setf (car nodes) t))))))))) ;; walk the set union ; (somewhat clumsy) (defmacro do-tree-sets ((string tree-sets) &body body) (let ((nodes (gensym)) (minimal-nodes (gensym)) (node (gensym))) `(let ((,string (make-array 0 :element-type #-mcl 'character #+mcl 'base-character :adjustable t :fill-pointer t))) (declare (string string)) (labels ((walk (,nodes) (let ((,minimal-nodes (%minimal-nodes ,nodes))) (cond ((eq ,minimal-nodes t) nil) ((null ,minimal-nodes) (let ((,string (copy-seq ,string))) ,@body) (walk ,nodes)) (t (vector-push-extend (caar ,minimal-nodes) ,string) (walk (collecting (dolist (,node ,minimal-nodes) (dolist (,node (cdr ,node)) (collect ,node))))) (decf (fill-pointer ,string)) (walk ,nodes)))))) (walk (collecting (dolist (,node ,tree-sets) (dolist (,node (cdr ,node)) (collect ,node)))))) nil))) #+alternative (defun tree-set-intersection (tree1 tree2) (let ((intersection (make-tree-set))) (labels ((walk (branch1 branch2) (collecting (dolist (node1 branch1) (dolist (node2 branch2) (cond ((and (null node1) (null node2)) (collect nil) (incf (car intersection))) ((eq (car node1) (car node2)) (let ((branch (walk (cdr node1) (cdr node2)))) (when branch (collect (cons (car node1) branch))))) (t nil))))))) (setf (cdr intersection) (walk (cdr tree1) (cdr tree2))) intersection))) ;; still better (defun tree-set-intersection (tree1 tree2) (let ((intersection (make-tree-set))) (labels ((walk (branch1 branch2) (collecting (loop (cond ((or (null branch1) (null branch2)) (return)) ((and (null (car branch1)) (null (car branch2))) (collect nil) (setf branch1 (cdr branch1) branch2 (cdr branch2)) (incf (car intersection))) ((null (car branch1)) (setf branch1 (cdr branch1))) ((null (car branch2)) (setf branch2 (cdr branch2))) ((char= (caar branch1) (caar branch2)) (let ((branch (walk (cdar branch1) (cdar branch2)))) (when branch (collect (cons (caar branch1) branch)))) (setf branch1 (cdr branch1) branch2 (cdr branch2))) ((char< (caar branch1) (caar branch2)) (setf branch1 (cdr branch1))) ((char> (caar branch1) (caar branch2)) (setf branch2 (cdr branch2))) (t (error "Haha"))))))) (setf (cdr intersection) (walk (cdr tree1) (cdr tree2))) intersection))) (defun tree-sets-intersect-p (tree1 tree2) (labels ((walk (branch1 branch2) (loop (cond ((or (null branch1) (null branch2)) (return)) ((and (null (car branch1)) (null (car branch2))) (return-from tree-sets-intersect-p t)) ((null (car branch1)) (setf branch1 (cdr branch1))) ((null (car branch2)) (setf branch2 (cdr branch2))) ((char= (caar branch1) (caar branch2)) (walk (cdar branch1) (cdar branch2)) (setf branch1 (cdr branch1) branch2 (cdr branch2))) ((char< (caar branch1) (caar branch2)) (setf branch1 (cdr branch1))) (t (setf branch2 (cdr branch2))))))) (walk (cdr tree1) (cdr tree2)) nil)) #+alternative (defun tree-sets-intersect-p (tree1 tree2) (labels ((walk (branch1 branch2) (dolist (node1 branch1) (dolist (node2 branch2) (cond ((and (null node1) (null node2)) (return-from tree-sets-intersect-p t)) ((eq (car node1) (car node2)) (walk (cdr node1) (cdr node2))) (t nil)))))) (walk (cdr tree1) (cdr tree2))) nil) #+test (let ((tree1 (make-tree-set)) (tree2 (make-tree-set))) (add-string tree1 "") (add-string tree1 "aadf") (add-string tree2 "aadf") (add-string tree1 "asdf") (add-string tree2 "asef") (add-string tree1 "assf") (add-string tree1 "ssf") (add-string tree1 "assf") (add-string tree2 "assf") (add-string tree2 "assf") (add-string tree1 "assff") (add-string tree2 "assff") (tree-set< tree1 tree1) (string-in-tree-p tree2 "asef") ;;(do-tree-set (string tree1) (print string)) ;;(do-tree-sets (string (list tree1 tree2)) (print string)) (do-tree-set (string (print (tree-set-intersection tree1 tree2))) (print string)) (tree-sets-intersect-p tree1 tree2)) ;; tree-set implementation of string-sets (defun make-string-set (&optional strings) (let ((ts (make-tree-set))) (dolist (str strings) (add-string ts str)) ts)) (defun copy-string-set (string-set) (copy-tree-set string-set)) (defun string-set-length (string-set) (tree-set-length string-set)) (defun string-set< (i1 i2) (tree-set< i1 i2)) #+test (defun string-set< (i1 i2) (do-tree-set (string1 i1) (do-tree-set (string2 i2) (let ((order (funcall #'string-order-fn string1 string2))) (unless (eq order :equal) (return-from string-set< order))))) :equal) (defun string-in-set-p (string string-set) (string-in-tree-p string-set string)) (defun add-string-to-set (string string-set) (add-string string-set string)) (defmacro do-string-set ((string-var string-set) &body body) `(do-tree-set (,string-var ,string-set) ,@body)) (defmacro do-string-sets ((string-var string-sets) &body body) `(do-tree-sets (,string-var ,string-sets) ,@body)) ;; nondestructive (defun string-set-intersection (i1 i2) (tree-set-intersection i1 i2)) (defun string-sets-intersect-p (i1 i2) (tree-sets-intersect-p i1 i2)) #+test (let* ((strings1 '("asdf" "sdflkj" "werlkjer" "sdkjf")) (strings2 '("asdf" "ert" "lkjasdflkaj" "dskfj" "sdkjf")) (tree1 (make-string-set strings1)) (tree2 (make-string-set strings2))) (do-tree-set (string tree1) (print string)) (print "-") (do-tree-set (string tree2) (print string)) (print "-") (do-tree-set (string (tree-set-intersection tree1 tree2)) (print string))) ;;; string trees (defun make-string-tree () (list 0)) (defun insert-string-tree-char (char-node node) (let ((char (car char-node))) (labels ((insert (branch) (cond ((null (cdr branch)) (setf (cdr branch) (list char-node))) ((eq :val (caadr branch)) (insert (cdr branch))) ((char< (caadr branch) char) (insert (cdr branch))) (t (push char-node (cdr branch)))))) (insert node)))) ;; When match is total, returns value (content of value-cell) and T as second value. ;; When match is partial, returns default as value, NIL as second value and (defun string-tree-get (tree string &optional default (start 0) (end (length string))) (labels ((find-string (pos node) (declare (fixnum pos n end)) (if (= pos end) (let ((value-cell (find :val (cdr node) :key #'car))) (return-from string-tree-get (values (or (cdr value-cell) default) (not (not value-cell)) pos))) (let* ((char (char string pos)) (sub-node (find char (cdr node) :key #'car))) (cond ((null sub-node) (return-from string-tree-get (values default nil pos (cdr node)))) (t (find-string (the fixnum (1+ pos)) sub-node))))))) (find-string start tree) default)) (defun string-tree-put (tree string value &optional default (start 0) (end (length string))) (declare (optimize (speed 3) (safety 1)) (string string)) (labels ((add-char (pos node) (declare (fixnum pos end)) (if (= pos end) (let ((val-cell (find :val (cdr node) :key #'car))) (unless val-cell (incf (car tree)) (setf val-cell (cons :val default)) (push val-cell (cdr node))) (setf (cdr val-cell) value)) (let* ((char (char string pos)) (sub-node (find char (cdr node) :key #'car))) (cond ((null sub-node) (let ((new-node (list char))) (insert-string-tree-char new-node node) #+non-ordered(push new-node (cdr node)) (add-char (the fixnum (1+ pos)) new-node))) (t (add-char (the fixnum (1+ pos)) sub-node))))))) (add-char start tree) ;;(setf list-tree (cdr ltree)) ;; string ;; wrong! value)) (defun string-tree-get-list (tree string-list &optional default (divider #\:)) (labels ((find-string (pos list node) (declare (fixnum pos n end)) (cond ((null list) ;; (= pos end) (let ((value-cell (find :val (cdr node) :key #'car))) (return-from string-tree-get-list (values (or (cdr value-cell) default) (not (not value-cell)))))) ((< pos (length (car list))) (let* ((char (char (car list) pos)) (sub-node (find char (cdr node) :key #'car))) (cond ((null sub-node) (return-from string-tree-get-list default)) (t (find-string (the fixnum (1+ pos)) list sub-node))))) ((cdr list) (let ((sub-node (find divider (cdr node) :key #'car))) (cond ((null sub-node) (return-from string-tree-get-list default)) (t (find-string 0 (cdr list) sub-node))))) (t (find-string 0 () node))))) (find-string 0 string-list tree) default)) (defun string-tree-put-list (tree string-list value &optional default (divider #\:)) (declare (optimize (speed 3) (safety 1)) (string string)) (labels ((add-char (pos list node) (declare (fixnum pos end)) (cond ((null list) (let ((val-cell (find :val (cdr node) :key #'car))) (unless val-cell (incf (car tree)) (setf val-cell (cons :val default)) (push val-cell (cdr node))) (setf (cdr val-cell) value))) ((< pos (length (car list))) (let* ((char (char (car list) pos)) (sub-node (find char (cdr node) :key #'car))) (cond ((null sub-node) (let ((new-node (list char))) (insert-string-tree-char new-node node) #+non-ordered(push new-node (cdr node)) (add-char (the fixnum (1+ pos)) list new-node))) (t (add-char (the fixnum (1+ pos)) list sub-node))))) ((cdr list) (let* ((sub-node (find divider (cdr node) :key #'car))) (cond ((null sub-node) (let ((new-node (list divider))) (insert-string-tree-char new-node node) #+non-ordered(push new-node (cdr node)) (add-char 0 (cdr list) new-node))) (t (add-char 0 (cdr list) sub-node))))) (t (add-char 0 () node))))) (add-char 0 string-list tree) ;; string-list ;; wrong!! value)) (defsetf string-tree-get-list (tree string &optional default divider) (value) `(string-tree-put-list ,tree ,string ,value ,default (or ,divider #\:))) ;; removes a key (STRING) value pair, if STRICT-P is t, or ;; the subtree starting with STRING, if STRICT-P is nil (defun string-tree-remove (tree string &optional (strict-p t)) (declare (optimize (speed 3) (safety 1)) ;;(ignore strict-p) (string string)) (when (string= string "") ;; remove everything (setf (cdr tree) nil (car tree) 0) (return-from string-tree-remove tree)) (let ((start 0) (end (length string)) (count 0)) (labels ((count-leaves (branch) (if (eq (car branch) :val) (incf count) (mapc #'count-leaves (cdr branch)))) (remove-char (pos node) (declare (fixnum pos end)) (if (= pos end) (let ((val-cell (find :val (cdr node) :key #'car))) (cond ((not strict-p) (count-leaves node) (decf (car tree) count) t) (val-cell (decf (car tree)) (setf (cdr node) (delete val-cell (cdr node))) (not (cdr node))) (t nil))) (let* ((char (char string pos)) (sub-node (find char (cdr node) :key #'car))) (cond ((null sub-node) nil) ((remove-char (the fixnum (1+ pos)) sub-node) (setf (cdr node) (delete sub-node (cdr node))) (not (cdr node))) (t nil)))))) (remove-char start tree) tree))) (defsetf string-tree-get (tree string &optional default start end) (value) `(string-tree-put ,tree ,string ,value ,default (or ,start 0) (or ,end (length ,string)))) (defmacro do-string-tree ((string value tree-set &key prefix) &body body) (let ((node (gensym)) (prefix-length (gensym))) `(let ((,string (make-array 0 :element-type #-mcl 'character #+mcl 'base-character :adjustable t :fill-pointer t)) (,prefix-length (when ,prefix (length ,prefix)))) (declare (string ,string)) (labels ((walk (,node) (cond ((eq (car ,node) :val) (let ((,string (copy-seq ,string)) (,value (cdr ,node))) ,@body)) ((and ,prefix (> ,prefix-length (fill-pointer ,string)) (char/= (car ,node) (char ,prefix (fill-pointer ,string)))) nil) (t (vector-push-extend (car ,node) ,string) (mapc #'walk (cdr ,node)) (decf (fill-pointer ,string)))))) (mapc #'walk (cdr ,tree-set))) nil))) (defun string-tree-count (tree) (car tree)) (defun store-string-tree (tree file &optional (key-end-marker #\#)) (with-open-file (stream file :direction :output :if-exists :supersede) (write-char key-end-marker stream) (terpri stream) (do-string-tree (string value tree) (write-string string stream) (write-char key-end-marker stream) (format stream "~a~%" value)))) (defun load-string-tree (file &optional tree) (let ((tree (or tree (make-string-tree)))) (with-open-file (stream file) (let ((key-end-marker (char (read-line stream) 0))) (loop for line = (read-line stream nil nil) while line do (destructuring-bind (key value) (split line key-end-marker) (setf (string-tree-get tree key) value))))) tree)) (defclass tree-table () ((tree :initform (list 0) :initarg :tree :accessor tree))) (defclass string-tree (tree-table) ()) #+test (print (let ((tree (make-string-tree))) (string-tree-put tree "aasdf" 2 nil 1 3) (incf (string-tree-get tree "asdf" 0) 3) (string-tree-put tree "agdf" 3) (string-tree-put tree "arsdf" 5) (string-tree-get tree "asdf" 0) (incf (string-tree-get tree "asdl" 4) 3) (incf (string-tree-get tree "as" 4) 3) (print (string-tree-count tree)) (do-string-tree (string value tree) (print (list string value))) (print (car (string-tree-remove tree "as" nil))) (do-string-tree (string value tree) (print (list string value))) tree)) #+test (let ((tree (make-string-tree))) (string-tree-put tree "aasdf" 2 nil) (multiple-value-bind (value value-p match-length) (string-tree-get tree "aasdf" 0) (print (list :value value :value-p value-p :match-length match-length)))) ;;; EOF