;;;-*- Mode: Lisp; Package: (REGEXP) -*- #+mcl(ccl:require :regexp-fsa) #| (cfg-chart-parse "(d)" :chart *regexp-chart*) (cfg-chart-parse "(woll){3}" :chart *regexp-chart*) (cfg-chart-parse "(((woll){3,5}|)?[1a-rt]+o\\+ho)*" :chart *regexp-chart* :goal 'expression) (cfg-chart-parse "((wollwutz)|oho)*" :chart *regexp-chart*) (cfg-chart-parse "(((woll)|rigus)?lull+oho)*" :chart *regexp-chart*) (tokenize-string "(((woll){3,5}|<@rigus>)?[^1a-rt]+o\\+ho)*" *regexp-chart*) (tokenize-string "(<@num>)?" *regexp-chart*) (tokenize-string "(num)?" *regexp-chart*) (defparameter *fsa* (compile-regexp "(((woll)|rigus)?lull+oho)*" :chart *regexp-chart*)) (defparameter *fsa* (compile-regexp "(((woll){3,5}|)?[1a-rt]+o\\+ho)*" :chart *regexp-chart*)) |# #| (dfa-compile-parsed-regexp '(:seq (:or : (:seq : (:? "n") "ing")) (:* (:seq (:? (:or "s" "e")) (:or "" (:seq "" (:? "n") "ing"))))) (make-instance 'regexp-fsa)) |# #| (time (defparameter *fsa* (compile-regexp " [01]?[0-9]\\.([01]?[0-9]\\.|(jan|feb|mar|apr|mai|jun|jul|aug|sep|okt|nov|des)\\.?)(19)?[0-9][0-9][ \\.\\,\\;\\:]"))) |# #| (defparameter *fsa* (any-determinize (compile-regexp "(.*|wu)tz"))) (fsa::fsa-print *fsa*) (in-language-p "atz" *fsa*) (in-language-p "wollalulflloho" *fsa*) (defparameter *fsa* (compile-regexp "(jaja|trull)+")) ; 600, 378, 367, 355, 450, class state: 493, more than 1 final-state: 645 ; dfa: 273; 340; lw: 120 (time (dotimes ( i 100) (substring-match *fsa* "wollwutzriguswollwutzwollwutzwollwutzwollwutzrigusrigus jaja " :minimal nil))) |# (tokenize-string "(a-[bjjjjjjc-d])" *regexp-chart*) (compile-regexp "(a-[bc-d])") (fsa::fsa-print (compile-regexp "(a-[csadsf])")) (fsa::fsa-print (compile-regexp "\\*\\<")) \>(-[\<])*\((-[\<])*/(-[\<])+/(-[\<])*\)\.*\< \((-[\)])*/(-[\)])+/.*\) \-[<]*(Q|W|R|Y|U|I|O|A|D|F|J|L|V|B|N|M|f|ä|ö|ü|ß|Ä|Ö|Ü|h)-[<]*\< \((-[/\(\)])*/(-[/\(\)])+/(-[/\(\)])*\) \-[<]*\(G -[<]*\< (fsa::fsa-print *fsa*) (setf *fsa* (compile-regexp "\\((-[\\)])*/(-[\\)])*/.*\\)")) (setf *fsa* (compile-regexp "\\((-[/()])*/(-[/()])+/(-[/()])*\\)")) (substring-match *fsa* "eba (Hmarobs; /mo/iHmars)," :minimal nil) #| ; 54 ms with big grammar and intermediate tree build ; 31 ms with smaller grammar and no tree build ; 17 ms without extended-lists ; 11 ms in reversed order ; 20 ms with one-char-strings (cfg-chart-parse "(((woll){3,5}|)?[1a-rt]+o\\+ho)*" :chart *regexp-chart* :goal 'expression) (tokenize-string "(((woll){3,5}|)?[1a-rt]+o\\+ho)*" *regexp-chart*) ; 27 ms (print-fsa (compile-regexp "(((woll){3,5}|)?[1a-rt]+o\\+ho)*")) (fsa::fsa-print (compile-regexp "(((woll)|rigus)?.1a\\-rt+o\\+ho)*")) (print-fsa (compile-regexp "((woll|)?rt+o\\+ho)*")) (fsa::fsa-print (compile-regexp "woll*" :chart *regexp-chart*)) (defparameter *fsa* (compile-regexp "(woll.?)+")) (fsa::fsa-print *fsa*) (defparameter *fsa* (compile-regexp "[a-zA-Z]+.3\\*.?")) (time (dotimes (i 100) (string-match *fsa* "aaaaaahadddddd43*r"))) (get-matching-substring *fsa* "aaaaaahadddddd3*") (string-match *fsa* "aaaaaahadddddd43*r") |# ;(fsa::fsa-print *fsa*) ;(fsa::fsa-print (any-determinize *fsa*)) #| (defparameter *fsa* (compile-regexp "\\((~|¢).*; ?(~|¢)[a-z]*\\)")) (string-match *fsa* "(~a;~x)" :exact nil :minimal nil) (substring-match *fsa* "(~a;~b)" :minimal nil) (inspect *regexp-chart*) (defparameter *fsa* (compile-regexp "(jaja|trull)+")) (fsa::fsa-print *fsa*) ; 600, 378, 367, 355, 392, 480; dfa: 311; lw: 140 (time (dotimes ( i 1000) (substring-match *fsa* "wollwutzriguswollwutzwollwutzwollwutzwollwutzrigusrigus jaja " :minimal nil))) (defparameter *fsa* (compile-regexp "(woll.?|wutz[i4-9o])+")) (substring-match *fsa* "ja diese wollowollwollwutz5rigus wolllllllllllllelein oho") (time (defparameter *fsa* (compile-regexp " [01]?[0-9]\\.([01]?[0-9]\\.|(jan|feb|mar|apr|mai|jun|jul|aug|sep|okt|nov|des)\\.?)(19)?[0-9][0-9][ \\.\\,\\;\\:]"))) (tokenize-string "[a-zA-Z]+.3\\*" *regexp-chart*) (tokenize-string " [01]?[0-9]\\.([01]?[0-9]\\.|(jan|feb|mar|apr|mai|jun|jul|aug|sep|okt|nov|des)\\.?)(19)?[0-9][0-9][ \\.\\,\\;\\:]" *regexp-chart*) (get-matching-substring *fsa* "aaaaaahadddddd3*45*") (substring-match *fsa* "aaaaaahadddddd3*") (fsa::fsa-print *fsa*) (get-matching-substring *fsa* "am 1.feb.1998 da hat das Wollwutzerl Geburtstag.") |#