;;;-*- Mode: Lisp; Package: UTILS -*- (in-package utils) ;; Text conversion utilities ;;; ;;; convert-file : converts a file between platforms ;;; (defun resolve-parens (str) "Returns as 1st value the string without the parentheses, as second value the string with the substring in parentheses removed. Only one pair of parentheses is allowed." (let* ((open-par (position #\( str)) (close-par (when open-par (position #\) str :start open-par)))) (if close-par (values (concat-substrings str 0 open-par (1+ open-par) close-par (1+ close-par)) (concat-substrings str 0 open-par (1+ close-par))) str))) (defun convert-char (c &key from-platform to-platform from-string to-string) (let* ((from-string (or from-string (getf $platform-string-plist from-platform))) (to-string (or to-string (getf $platform-string-plist to-platform))) (pos (position c from-string))) (when pos (char to-string pos)))) (defun write-to-stream-sgml (str stream &optional (from-platform :mac)) "from-platform is :mac, :dos or :win. Non-7bit-ascii-chars are replaced by SGML character entities" (unless stream (return-from write-to-stream-sgml (convert-string str from-platform :sgml))) (unless (eq from-platform :mac) (setf str (convert-string str from-platform :mac))) (let ((length (length str))) (labels ((convert (pos) (when (/= pos length) (let ((start pos) entity) (loop with char do (setf char (char str pos) entity (and (not (<= (the fixnum #.(char-code #\A)) (the fixnum (char-code char)) (the fixnum #.(char-code #\z)))) (gethash char *sgml-char-entity-table*))) (incf pos) until (or (= pos length) entity)) (cond ((not entity) (princ (subseq str start) stream)) ((> pos start) (princ (subseq str start (1- pos)) stream) (princ entity stream) (convert pos)) (t (princ entity stream) (convert pos))))))) (convert 0)))) ;(write-to-stream-sgml "S�b�e" t) ;(time (dotimes (i 2000) (convert-string "S�b�e" :mac :sgml))) ;(time (dotimes (i 2000) (write-to-stream-sgml "S�b�e" nil))) ;(time (dotimes (i 2000) (subst-chars-by-substrings "S�b�e" *iso-latin1*))) ;(time (dotimes (i 2000) (convert-string "S�b�e" :mac :dos))) (defun mac-to-win-char (c) (dos-to-win-char (or (mac-to-dos-char c) c))) (defun string-map (fun str) "applies fun successively to each char of string and returnes the destructively modified string" (dotimes (i (length str)) (let ((new-char (funcall fun (char str i)))) (when new-char (setf (char str i) new-char)))) str) ; call this dos-to-mac-string (defun substitute-dos-mac-chars (str) (dotimes (i (length str)) (let* ((dos-c (char str i)) (mac-c (dos-to-mac-char dos-c))) (when mac-c (setf (char str i) mac-c)))) str) (defun substitute-dos-win-chars (str) (dotimes (i (length str)) (let* ((dos-c (char str i)) (win-c (dos-to-win-char dos-c))) (when win-c (setf (char str i) win-c)))) str) (defun substitute-mac-dos-chars (str) (dotimes (i (length str)) (let* ((mac-c (char str i)) (dos-c (mac-to-dos-char mac-c))) (when dos-c (setf (char str i) dos-c)))) str) (defun substitute-win-mac-chars (str) (dotimes (i (length str)) (let* ((win-c (char str i)) (mac-c (win-to-mac-char win-c))) (when mac-c (setf (char str i) mac-c)))) str) (defun substitute-mac-win-chars (str) (dotimes (i (length str)) (let* ((mac-c (char str i)) (win-c (mac-to-win-char mac-c))) (when win-c (setf (char str i) win-c)))) str) ;(subst-chars-by-substrings "S�b�e" *iso-latin1*) (defun filename-name (file) (let ((pos (position #\. file :from-end t))) (if pos (subseq file 0 pos) file))) ;(process-in-files #'print "lisp") (defun process-in-files (f type) (mapcar (lambda (path) (funcall f path)) (directory (make-pathname :directory (pathname-directory (choose-directory-dialog)) :name "*" :type type)))) (defun process-files (f old-type new-type) (mapcar (lambda (path) (let* ((dir (directory-namestring path)) (file (file-namestring path)) (fname (filename-name file)) (out-path (make-pathname :directory dir :name fname :type new-type))) (funcall f path out-path))) (directory (make-pathname :directory (choose-directory-dialog) :name "*" :type old-type)))) ;; some file-handling macros (defmacro with-files ((path out-path old-type &optional new-type directory) &body body) "Performs body on all files in directory which have extension old-type. New files with extension new-type are created. When new-type = nil or = old-type, the new files are renamed to the old files. If directory is not specified, the user is prompted for a directory." (let ((dir (gensym)) (rename (gensym))) `(let (,out-path (,rename (when (or (not ,new-type) (string= ,new-type ,old-type)) (symbol-name (gensym)))) (,dir (directory (make-pathname :directory (or ,directory (choose-directory-dialog)) :name "*" :type ,old-type)))) (dolist (,path ,dir) (setf ,out-path (make-pathname :directory (directory-namestring ,path) :name (filename-name (file-namestring ,path)) :type (or ,rename ,new-type))) ,@body (when ,rename (rename-file ,out-path ,path :if-exists :supersede) (delete-file ,out-path)))))) (defmacro with-files-streams ((stream out-stream old-type &optional new-type directory) &body body) (let ((path (gensym)) (out-path (gensym))) `(with-files (,path ,out-path ,old-type ,new-type ,directory) (with-open-file (,stream ,path) (with-open-file (,out-stream ,out-path :direction :output :if-exists :supersede) ,@body))))) (defmacro with-files-lines ((line out-stream old-type &optional new-type directory) &body body) (let ((stream (gensym))) `(with-files-streams (,stream ,out-stream ,old-type ,new-type ,directory) (with-stream-lines (,stream ,line) ,@body)))) (defmacro with-files-chars ((char out-stream old-type &optional new-type directory) &body body) (let ((stream (gensym))) `(with-files-streams (,stream ,out-stream ,old-type ,new-type ,directory) (with-stream-chars (,stream ,char) ,@body)))) (defmacro with-file-chars ((line path) &body body) (let ((stream (gensym))) `(with-open-file (,stream ,path) (with-stream-chars (,stream ,line) ,@body)))) (defun process-file-streams (fun old-type new-type) (process-files #'(lambda (path out-path) (format t "file: ~a~%" path) (with-open-file (stream path :direction :input) (with-open-file (out-stream out-path :direction :output :if-exists :supersede) ;(let ((out-stream t)) out-path ; to avoid warning (funcall fun stream out-stream)))) old-type new-type)) (defun process-in-file-streams (fun type) (process-in-files #'(lambda (path) (format t "file: ~a~%" path) (with-open-file (stream path :direction :input) (funcall fun stream))) type)) (defun process-file (f &optional path out-path) (unless path (setf path (choose-file-dialog))) (unless out-path (setf out-path (choose-new-file-dialog))) (with-open-file (stream path :direction :input) (with-open-file (out-stream out-path :direction :output :if-exists :supersede) ;(let ((out-stream t)) out-path ; to avoid warning (funcall f stream out-stream)))) ;(substitute-dos-mac-chars "b�ter i sj�en - son r�le se borne � l") ;(convert-file :from-platform :win :to-platform :mac) ;(convert-file :from-platform :mac :to-platform :dos) ; ??? not tested (defmacro with-stream-chars ((stream char) &body body) `(multiple-value-bind (reader-function value) (stream-reader ,stream) ;(locally (declare (optimize (speed 3))) (loop for ,char = (funcall reader-function value) while ,char do ,@body))) (defun convert-file (&key (path (choose-file-dialog)) out-path new-type (from-platform :mac) to-platform delete-nl/eof) (unless out-path (let* ((dir (directory-namestring path)) (file (file-namestring path)) (fname (filename-name file))) (setf out-path (make-pathname :directory dir :name fname :type (or new-type (string-downcase (string to-platform))))))) (process-file #'(lambda (stream out-stream) (with-stream-lines (stream line) (when delete-nl/eof (setf line (delete #\newline (delete #\^z line)))) (case to-platform (:sgml (write-to-stream-sgml line out-stream from-platform) (terpri out-stream)) (t (setf line (convert-string line from-platform to-platform)) (format out-stream "~a~%" line))))) path out-path)) ;(convert-file :to-platform :sgml :new-type "win") ;(convert-file :from-platform :unix :to-platform :mac :new-type "mac") (defun dos-to-mac-file (&key (path (choose-file-dialog)) out-path new-type) "converts dos- to mac-characters" (unless out-path (let* ((dir (directory-namestring path)) (file (file-namestring path)) (fname (filename-name file))) (setf out-path (make-pathname :directory dir :name fname :type (or new-type "mac"))))) (process-file (lambda (stream out-stream) (let (line) (loop do (setf line (read-line stream nil :eof)) until (eq line :eof) do (setf line (delete #\newline (delete #\^z line)) line (substitute-dos-mac-chars line)) (format out-stream "~a~%" line)) (format out-stream "~%" line))) path out-path)) (defun mac-to-dos-file (&key (path (choose-file-dialog)) out-path new-type) "converts dos- to mac-characters" (unless out-path (let* ((dir (directory-namestring path)) (file (file-namestring path)) (fname (filename-name file))) (setf out-path (make-pathname :directory dir :name fname :type (or new-type "txt"))))) (process-file (lambda (stream out-stream) (let (line) (loop do (setf line (read-line stream nil :eof)) until (eq line :eof) do (setf line (substitute-mac-dos-chars line)) (format out-stream "~a~%" line)) (format out-stream "~%" line))) path out-path)) (defun mac-to-win-file (&key (path (choose-file-dialog)) out-path new-type) "converts dos- to mac-characters" (unless out-path (let* ((dir (directory-namestring path)) (file (file-namestring path)) (fname (filename-name file))) (setf out-path (make-pathname :directory dir :name fname :type (or new-type "win"))))) (process-file (lambda (stream out-stream) (let (line) (loop do (setf line (read-line stream nil :eof)) until (eq line :eof) do (setf line (delete #\newline (delete #\^z line)) line (substitute-mac-win-chars line)) (format out-stream "~a~%" line)))) path out-path)) ;(mac-to-win-file) (defun excerpt-lines-from-file (predicate &key (path (choose-file-dialog)) out-path new-type) "excerpt lines on which predicate returnes true" (unless out-path (let* ((dir (directory-namestring path)) (file (file-namestring path)) (fname (filename-name file))) (setf out-path (make-pathname :directory dir :name fname :type (or new-type "exc"))))) (process-file #'(lambda (stream out-stream) (let (line) (loop do (setf line (read-line stream nil :eof)) until (eq line :eof) do (when (funcall predicate line) (format out-stream "~a~%" line))))) path out-path)) ;(directory (choose-directory-dialog) :name "*") (defun extract-from-files (fun &key (type "txt") out-name (out-type "txt")) (let* ((dir-path (choose-directory-dialog)) (host (pathname-host dir-path)) (dir (pathname-directory dir-path)) (files (directory (make-pathname :host host :directory dir :name "*" :type type))) (out-path (make-pathname :host host :directory dir :name out-name :type out-type))) (with-open-file (out-stream out-path :direction :output :if-exists :supersede) (dolist (file files) (with-open-file (stream file :direction :input) (funcall fun stream out-stream)))))) (defun extract-lines-from-files (fun &key (type "txt") out-name (out-type "txt")) (extract-from-files #'(lambda (stream out-stream) (let (line) (loop do (setf line (read-line stream nil :eof)) until (eq line :eof) do (funcall fun line out-stream)))) :type type :out-name out-name :out-type out-type)) (defun do-files-lines (fun &key (type "txt")) (process-in-file-streams #'(lambda (stream) (let (line) (loop do (setf line (read-line stream nil :eof)) until (eq line :eof) do (funcall fun line)))) type)) (defun merge-files (type) (let ((out-file (choose-new-file-dialog))) (process-in-files #'(lambda (file) (print file) (with-open-file (stream file :direction :input) (with-open-file (out-stream out-file :direction :output :if-exists :append :if-does-not-exist :create) (loop with line do (setf line (read-line stream nil :eof)) until (eq line :eof) do (format out-stream "~a~%" line))))) type))) (defun sort-pointer (vec &optional (predicate #'string<) (l 0) (r (1- (length vec)))) (let ((pointer-vec (make-array (- r l -1) :element-type 'fixnum))) (loop for i from l to r do (setf (aref pointer-vec (- i l)) i)) (sort pointer-vec predicate :key #'(lambda (i) (aref vec i))) pointer-vec)) ; should be refined! (remove-if ...) #+dict (defun sort-file (&key (path (choose-file-dialog)) out-path new-type dos-to-mac reverse count-occurences skip remove-chars strip-fun trim-whitespace) "sorts lines of a file" (unless out-path (let* ((dir (directory-namestring path)) (file (file-namestring path)) (fname (filename-name file))) (setf out-path (make-pathname :directory dir :name fname :type (or new-type (if reverse "rev" "srt")))))) (unless strip-fun (setf strip-fun (if dos-to-mac (lambda (line) (substitute-dos-mac-chars (remove-if #'(lambda (c) (find c remove-chars)) line))) (lambda (line) (remove-if #'(lambda (c) (find c remove-chars)) line))))) (let ((line-array (make-array 2000 :element-type 'simple-string :initial-element "" :fill-pointer 0 :adjustable t)) (index-array (make-array 2000 :element-type 'simple-string :initial-element "" :fill-pointer 0 :adjustable t))) (with-open-file (stream path :direction :input) (let (line (counter 0)) (loop do (setf line (read-line stream nil :eof)) until (eq line :eof) do (incf counter) (when (zerop (mod counter 1000)) (print counter)) (when dos-to-mac (setf line (delete #\ line))) (when trim-whitespace (setf line (string-trim " " line))) (let ((sort-str (funcall strip-fun line))) (vector-push-extend (if reverse (reverse sort-str) sort-str) index-array)) (vector-push-extend line line-array)))) (print (fill-pointer index-array)) (let ((sorted-lines (sort-pointer index-array #'(lambda (str1 str2) (lang-string-lessp str1 str2 :skip-ch skip :subst-str nil :sort-end-char #\; :language dict::$langNorwegian))))) (with-open-file (stream out-path :direction :output :if-exists :supersede) (let ((counter 0)) (print (fill-pointer index-array)) (if count-occurences (let ((prev-line "") (number-of-occ 1)) (dotimes (i (fill-pointer index-array)) (incf counter) (when (zerop (mod counter 1000)) (print counter)) (let ((line (aref line-array (aref sorted-lines i)))) (cond ((zerop (length line)) nil) ((string= prev-line line) (incf number-of-occ)) (t (format stream "~4,d ~a~%" number-of-occ prev-line) (setf number-of-occ 1 prev-line line))))) (format stream "~4,d ~a~%" number-of-occ prev-line)) (dotimes (i (fill-pointer index-array)) (incf counter) (when (zerop (mod counter 1000)) (print counter)) (let ((line (aref line-array (aref sorted-lines i)))) (unless (zerop (length line)) (format stream "~a~%" line)))))))))) (defun find-max-list (lists &optional found (length 0)) (if lists (let ((new-length (length (car lists)))) (if (> new-length length) (find-max-list (cdr lists) (car lists) new-length) (find-max-list (cdr lists) found length))) found))