词法分析器
词法分析先修-表达式求值
求一个包含加减乘除乘方括号的表达式值,要求考虑运算优先级
比如1+2*3-(4+5)*6
迪杰斯特拉双栈法
双栈,一个用于存放操作数stack<double> operands
,一个用于存放运算符stack<char> operaotors
运算优先级通过给每个运算符定义栈内外优先级来体现:
教材给出的优先级定义是:
icp,栈外优先级
isp,栈内优先级
优先级
(
+-
*/
)
栈内优先级isp
1
3
5
8
栈外优先级icp
8
2
4
1
双栈算法
对于一个操作数,直接压入operands栈
对于一个运算符c,
1.如果其栈外优先级比符号栈栈顶运算符的栈内优先级要高,则c入符号栈,即
1 2 3 if (operators.empty()||icp(c)>isp(operators.top())){ operators.push(c); }
2.如果其栈外优先级比符号栈栈顶优先级低,则首先要符号栈弹栈,每次弹出一个符号,顺带弹出两个操作数栈的数,根据该符号对这两个操作数进行运算之后将运算结果重新压入操作数栈.如此直到符号栈顶的栈内优先级小于c的栈外优先级时,c压栈
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 if (!operators.empty()&&icp(c)<isp(operators.top())){ while (!operators.empty()&&icp(c)<isp(operators.top())){ op=operators.top(); operators.pop(); if (operands.size()>=2 ){ first=operands.top(); operands.pop(); second=operands.top(); operands.pop(); operands.push(basic_operate(op,first,second)); } else error } if (operators.empty()||icp(c)>isp(operators.top())){ operators.push(c); } else { operators.pop(); } }
3.如果符号栈顶的栈内优先级等于c的栈外优先级,直接符号栈退栈一个,c也一并扔掉.
如果2中发生了符号栈退栈到某时,符号栈顶的栈内优先级等于c的栈外优先级,也是将栈顶和c都扬了
这是因为遇到了括号
1 2 3 if (icp(c)==isp(operators.top())){ operators.pop() }
完整代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 #include <iostream> #include <stack> #include <algorithm> #include <string> #include <vector> using namespace std ; int Invalid;int icp (const char &c) { int priority=0 ; switch (c){ case '+' :case '-' : priority=2 ; break ; case '*' :case '/' : priority=4 ; break ; case ')' : priority=1 ; break ; case '(' : priority=8 ; } return priority; } int isp (const char &c) { int priority=0 ; switch (c){ case '+' :case '-' : priority=3 ; break ; case '*' :case '/' : priority=5 ; break ; case ')' : priority=8 ; break ; case '(' : priority=1 ; } return priority; } stack <char > operators;stack <double > operands;double basic_operate (const char &op,const double &first,const double &second) { double result=0 ; switch (op){ case '+' : result=first+second; break ; case '-' : result=first-second; break ; case '*' : result=first*second; break ; case '/' : result=first/second; break ; } return result; } string removeBlank (const string &str) { string exp ; for (auto c:str){ if (c!=' ' ){ exp +=c; } } return exp ; } bool isDigit (const char &c) { return c>='0' &&c<='9' ||c=='.' ; } bool isValidOperator (const char &c) { switch (c){ case '+' :case '-' :case '*' :case '/' : case '(' :case ')' : return true ; } return false ; } bool canPushOperator (const char &c) { if (operators.empty()||icp(c)>isp(operators.top())){ return true ; } return false ; } bool matchBracket (const char &c) { return !operators.empty()&&isp(operators.top())==icp(c); } bool getTwoOperands (double &first,double &second) { if (operands.size()<2 ){ Invalid=1 ; return false ; } first=operands.top(); operands.pop(); second=operands.top(); operands.pop(); return true ; } double aftermath () { double first,second; char op; while (!operators.empty()){ op=operators.top(); operators.pop(); if (!getTwoOperands(first,second)){ Invalid=1 ; return 0 ; } operands.push(basic_operate(op,first,second)); } return operands.top(); } void error (const string &err) { cerr <<"error:" <<err<<endl ; } double evalExpression (const string &expression) { string buffer; double digit; bool digit_flag=0 ; double first,second; char op; double result; int length=expression.length(); char c; for (int i=0 ;i<length;++i){ c=expression[i]; if (isDigit(c)){ if (digit_flag==false ){ digit_flag=true ; buffer=c; } else buffer+=c; cout <<"actived" <<endl ; continue ; } else if (!isValidOperator(c)){ error("inValidOperator" ); Invalid=1 ; return 0 ; } if (digit_flag==true ){ digit_flag=false ; digit=stod(buffer); operands.push(digit); buffer="" ; } if (canPushOperator(c)){ operators.push(c); } else if (matchBracket(c)){ operators.pop(); } else { op=operators.top(); operators.pop(); if (!getTwoOperands(first,second)){ error("lack of operands" ); return -1 ; } operands.push(basic_operate(op,first,second)); --i; } } if (digit_flag==true ){ digit=stod(buffer); operands.push(digit); digit_flag=false ; buffer="" ; } result=aftermath(); if (Invalid){ return -1 ; } return result; } double calc (const string &str) { string expression=removeBlank(str); cout <<expression<<endl ; double result=evalExpression(expression); return result; } int main () { string text="1+2*3*4+(1+2)*3" ; double result=calc(text); cout <<result; return 0 ; }
递归下降算法
三个函数,分成三层
最高层,将整个表达式看成若干项的和差,维护一个result,从左到右累加各项的和差.每一项都要扔给中间层函数进行计算
中间层,将最高层给的每一项都视为一个积式,维护一个result,从左到右累乘各项的积商.每一项都要扔给底层函数进行计算
最底层,将中间层给的每一项都视为一个数或者一个括号.根据是否有左括号进行区分.对于数就字符串转数然后返回了,对于括号则将括号里的表达式视为一个顶层和差式,扔给最高层
比如expression=1+(2+3)*4-5
最高层evalExpression会将这个表达式拆分成若干term的积
1 2 3 4 5 evalExpression(expression): return evalTerm(term1)+evalTerm(term2)-evalTerm(term3)
然后每个term都会往下扔,甩锅给中层函数evalTerm,其中term2具有代表性
1 2 3 4 evalTerm(term2): return evalFactor(factor1)*evalFactor(factor2)
然后每个factor又会往下扔
1 2 3 4 evalFactor((2 +3 )): return evalExpression(2 +3 ) evalFactor(4 ): return 4
底层工具人儿evalFactor算完了中层工具人儿evalTerm的任务之后,把值交给中层工具人儿
1 2 evalTerm(term2): return 5 *4
中层工具人儿evalTerm算完了之后把值交给顶层人上人儿evalExpression
1 2 evalExpression(expression): return 1 +20 -5
如此整个表达式就求完了
完整程序
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 #include <iostream> #include <algorithm> #include <string> using namespace std ; int Invalid;int pos;string expression;double calc () ;double evalExpression () ;double evalTerm () ;double evalFactor () ;string removeBlank (const string &str) ;bool isEnd () { return expression[pos]=='\0' ; } bool isAdd (const char &c) { return c=='+' ; } bool isMinus (const char &c) { return c=='-' ; } bool isLevel1 (const char &c) { return isAdd(c)||isMinus(c); } bool isMultiply (const char &c) { return c=='*' ; } bool isDivide (const char &c) { return c=='/' ; } bool isLevel2 (const char &c) { return isDivide(c)||isMultiply(c); } bool isLeftBracket (const char &c) { return c=='(' ; } bool isRightBracket (const char &c) { return c==')' ; } bool isDigit (const char &c) { return c>='0' &&c<='9' ; } bool isDot (const char &c) { return c=='.' ; } string removeBlank (const string &str) { string expression; for (auto c:str){ if (c!=' ' ){ expression+=c; } } return expression; } double calc (const string &str) { pos=0 ; Invalid=0 ; expression=removeBlank(str); double result=evalExpression(); return result; } double evalExpression () { double result=0 ; double term; char op; term=evalTerm(); if (Invalid){ return 0 ; } result+=term; while (!isEnd()){ op=expression[pos]; if (!isLevel1(op)){ return result; } ++pos; term=evalTerm(); if (Invalid){ return 0 ; } if (isAdd(op)){ result+=term; } else if (isMinus(op)){ result-=term; } } return result; } double evalTerm () { double result=1 ; double factor; char op; factor=evalFactor(); if (Invalid){ return 0 ; } result*=factor; while (!isEnd()){ op=expression[pos]; if (!isLevel2(op)){ return result; } ++pos; factor=evalFactor(); if (Invalid){ return 0 ; } if (isMultiply(op)){ result*=factor; } else if (isDivide(op)){ result/=factor; } } return result; } double evalFactor () { double result=0 ; string buffer; bool isNegetive=0 ; if (expression[pos]=='-' ){ isNegetive=true ; ++pos; } if (isLeftBracket(expression[pos])){ ++pos; result=evalExpression(); if (!isRightBracket(expression[pos])){ Invalid=1 ; return 0 ; } ++pos; return result; } while (isDigit(expression[pos])){ buffer+=expression[pos]; ++pos; } result=stod(buffer); if (isNegetive){ result*=-1 ; } return result; } string text;double result;int main () { text="1+2*3+(4+5)*6" ; cout <<calc(text)<<endl ; return 0 ; }
项目结构
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 ┌──(root㉿Executor)-[/mnt/c/Users/86135/Desktop/parser/parser] └─# tree . ├── ds │ ├── Graph.cpp │ ├── Graph.h │ ├── Hash.cpp │ ├── Hash.h │ ├── Tree.cpp │ ├── Tree.h │ ├── Trie.cpp │ └── Trie.h ├── e2t │ ├── e2t.cpp │ └── e2t.h ├── main ├── main.cpp ├── main.exe ├── make.bat ├── make.sh ├── n2d │ ├── DFA.cpp │ ├── DFA.H │ ├── DFA.md │ ├── epsilon.cpp │ ├── n2d.cpp │ └── n2d.h ├── objects │ ├── AC.o │ ├── e2t.o │ ├── Graph.o │ ├── t2n.o │ ├── Tree.o │ └── Trie.o ├── README.md ├── swj.cpp └── t2n ├── t2n.cpp └── t2n.h
采用管道过滤器模式
ds包括二叉树,图等基础数据结构,用作基类
e2t模块构建表达式树,该模块接收一个string类型,输出一个Tree类型
t2n模块构建NFA,该模块接收一个Tree类型,输出一个NFA类型
n2d模块构建DFA(正在此处),该模块接收一个NFA,输出一个DFA
DFA最小化(未完成),该模块接收一个DFA,输出一个最小化DFA
main.cpp是入口点
make.sh和make.bat为编译链接脚本,目前还不会使用makefile
ds
二叉树
Tree.h
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 #pragma once #include <iostream> struct TreeNode { char op; TreeNode *left, *right; bool isCharacter () ; char getOp () ; TreeNode(char c = '\0' , TreeNode *l = NULL , TreeNode *r = NULL ) ; friend std ::ostream &operator<<(std ::ostream &, const TreeNode &); }; struct Tree { public: TreeNode *root; void inOrder (TreeNode *) ; void postOrder (TreeNode *) ; public: Tree(TreeNode *o=NULL ); void inOrder () ; void postOrder () ; };
Tree.cpp
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 #include <iostream> #include "Tree.h" using namespace std;TreeNode::TreeNode (char c, TreeNode *l, TreeNode *r) : op (c), left (l), right (r) {} bool TreeNode::isCharacter () { return left==NULL &&right==NULL ; } char TreeNode::getOp () { return op; } ostream &operator <<(ostream &os, const TreeNode &o) { os << o.op; return os; } void Tree::inOrder (TreeNode *o) { if (o == NULL ) return ; inOrder (o->left); cout << (*o) << " " ; inOrder (o->right); } void Tree::postOrder (TreeNode *o) { if (o == NULL ) return ; postOrder (o->left); postOrder (o->right); cout << (*o) << " " ; } Tree::Tree (TreeNode *o ) : root (o) {} void Tree::inOrder () { inOrder (root); cout << endl; } void Tree::postOrder () { postOrder (root); cout << endl; }
有向图
Graph.h
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 #pragma once #include <iostream> #include <vector> #define maxn 100000 struct GraphEdge { int from; int to; char character; GraphEdge (int ,int ,char ); void setValue (int ,int ,char ) ; int getTo () ; friend std::ostream &operator <<(std::ostream &,const GraphEdge&); char getCharacter () ; }; struct GraphNode { bool isEnd; std::vector<int > postEdges; void addEdge (const int &) ; GraphNode (); bool end () ; friend std::ostream &operator <<(std::ostream &,const GraphNode&); }; struct Graph { GraphEdge edges[maxn]; GraphNode nodes[maxn]; int cnt_edges; int cnt_nodes; int start; int finish; Graph (); int newEdge (const int &f,const int &t,const char &c) ; int newNode () ; void info () ; };
Graph.cpp
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 #include <iostream> #include <vector> #include <algorithm> #include "Graph.h" using namespace std;GraphEdge::GraphEdge (int f=0 ,int t=0 ,char c='\0' ){ from=f; to=t; character=c; } void GraphEdge::setValue (int f=0 ,int t=0 ,char c='\0' ) { from=f; to=t; character=c; } int GraphEdge::getTo () { return to; } char GraphEdge::getCharacter () { return character; } ostream &operator <<(ostream &os,const GraphEdge &edge){ if (edge.character=='\0' ){ os<<edge.from<<"---->" <<edge.to; } else os<<edge.from<<"--\"" <<edge.character<<"\"-->" <<edge.to; return os; } GraphNode::GraphNode (){ isEnd=false ; } void GraphNode::addEdge (const int &e) { postEdges.push_back (e); } bool GraphNode::end () { return isEnd; } ostream &operator <<(ostream &os,const GraphNode &node){ os<<"node" ; return os; } Graph::Graph (){ start=finish=0 ; cnt_edges=cnt_nodes=0 ; } int Graph::newEdge (const int &f,const int &t,const char &c) { ++cnt_edges; edges[cnt_edges].setValue (f,t,c); return cnt_edges; } int Graph::newNode () { ++cnt_nodes; return cnt_nodes; } void Graph::info () { for (int i=1 ;i<=cnt_nodes;++i){ cout<<i<<"((" <<i<<"))" <<endl; } for (int i=1 ;i<=cnt_edges;++i){ cout<<edges[i]<<endl; } }
e2t
expression to tree
给定一个正规式,构造其表达式树
正规式中包含操作数都是单个字母,运算符有:
运算符
符号
优先级
括号
()
最高
闭包
*
次高
连接
.
次低
或
|
最低
都是左结合运算,其中
连接,或,都是二元运算符
星闭包是一元运算符,操作数在左
比如a|(b.c)*|d.e
对应的表达式树就是:
1 D:\XDcompAT>run-compat.bat RE-TREE "a|(bc)*|de" -g
image-20221018113058098
右边这个图,每个节点最多往下分两个叉,因此可以用二叉树实现,只有闭包运算这里是只有一个儿子的,可以作为二叉树节点的左儿子,右儿子为空
e2t.h
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 #pragma once #include "../ds/Tree.h" bool isOr (const char &) ;bool isJoin (const char &) ;bool isClosure (const char &) ;bool isLeftBracket (const char &) ;bool isRightBracket (const char &) ;bool isEnd () ;char getCurrentOperator () ;bool isAlpha (const char &) ;bool isCharacter (const char &) ;bool isNull (const char &) ;bool isSpace (const char &) ;TreeNode *evalOr () ; TreeNode *evalJoin () ; TreeNode *evalClosure () ; Tree getTree (const std ::string &) ;
e2t.cpp
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 #include <iostream> #include <algorithm> #include <string> #include <vector> #include <cmath> #include "../ds/Tree.h" #include "e2t.h" using namespace std ; static string expression;static int pos;static int Invalid;bool isOr (const char &c) { return c == '|' ; } bool isJoin (const char &c) { return c == '.' ; } bool isClosure (const char &c) { return c == '*' ; } bool isLeftBracket (const char &c) { return c == '(' ; } bool isRightBracket (const char &c) { return c == ')' ; } bool isEnd () { return expression[pos] == '\0' ; } char getCurrentOperator () { return expression[pos]; } bool isAlpha (const char &c) { if (c >= 'a' && c <= 'z' ) return true ; if (c >= 'A' && c <= 'Z' ) return true ; return false ; } bool isCharacter (const char &c) { if (isAlpha(c))return true ; return c!='.' &&c!='|' &&c!='(' &&c!=')' &&c!='*' ; } bool isNull (const char &c) { return c=='\0' ; } bool isSpace (const char &c) { return c == ' ' ; } Tree getTree (const string &str) { string exp ; string removeSpace = "" ; for (auto c : str) { if (isSpace(c)) continue ; removeSpace += c; } int length = removeSpace.length(); for (int i = 0 ; i < length; ++i) { if (isAlpha(removeSpace[i]) && i > 0 && isAlpha(removeSpace[i - 1 ])) { exp += '.' ; } exp += removeSpace[i]; } expression = exp ; Invalid = 0 ; pos = 0 ; return Tree(evalOr()); } TreeNode *evalOr () { TreeNode *joinTerm = NULL ; TreeNode *root = NULL ; TreeNode *assist = NULL ; char op = '\0' ; joinTerm = evalJoin(); if (Invalid) { return NULL ; } root = joinTerm; while (!isEnd()) { op = getCurrentOperator(); if (!isOr(op)) { return root; } ++pos; joinTerm = evalJoin(); if (Invalid) { return NULL ; } assist = root; root = new TreeNode(op, assist, joinTerm); } return root; } TreeNode *evalJoin () { TreeNode *clousureTerm = NULL ; TreeNode *root = NULL ; TreeNode *assist = NULL ; char op = '\0' ; clousureTerm = evalClosure(); if (Invalid) { return NULL ; } root = clousureTerm; while (!isEnd()) { op = getCurrentOperator(); if (!isJoin(op)) { return root; } ++pos; clousureTerm = evalClosure(); if (Invalid) { return NULL ; } assist = root; root = new TreeNode(op, assist, clousureTerm); } return root; } TreeNode *evalClosure () { TreeNode *joinTerm; TreeNode *singleTerm; TreeNode *root; TreeNode *assist; char op; op = getCurrentOperator(); if (isLeftBracket(op)) { ++pos; joinTerm = evalOr(); op = getCurrentOperator(); if (!isRightBracket(op)) { Invalid = 1 ; return NULL ; } root = joinTerm; ++pos; } else { singleTerm = new TreeNode(op); ++pos; root = singleTerm; } op = getCurrentOperator(); if (!isClosure(op)) { return root; } ++pos; assist = root; root = new TreeNode(op, assist); return root; }
编译运行
1 2 a b c . * | d e . | a | b . c * | d . e
与教具给出的二叉树图是一样的
t2n
tree to nfa
NFA是一个有向图,可以以Graph为基类
t2n.h
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 #pragma once #include <vector> #include <iostream> #include <utility> #include "../ds/Graph.h" #include "../ds/Tree.h" typedef std ::pair <int ,int > handle;struct NFA : public Graph{ std ::string toJudge; int tot_length; int start; int finish; NFA(Tree &); handle subNFA (TreeNode *) ; bool backtracing_judge (const std ::string &expression) ; bool backtracing_judge (int current_node,int current_index) ; std ::vector <int > getEpsilon (std ::vector <int > T) ; std ::vector <int > smove (std ::vector <int > S,char character) ; bool no_backtracing_judge (const std ::string &str) ; };
t2n.cpp
主要逻辑,就是一共getNFA递归函数,使用了汤普森算法
该函数接收一个语法树根,根据该根节点中的符号指示,决定如何建立子图,返回当前子图
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 #include <iostream> #include <algorithm> #include <string> #include <vector> #include <cmath> #include <utility> #include <stack> #include <cstring> #include <cstdio> #include <cstdlib> #include "../e2t/e2t.h" #include "t2n.h" using namespace std;NFA::NFA (Tree &tree) { handle p = subNFA (tree.root); start = p.first; finish = p.second; } handle NFA::subNFA (TreeNode *o) { int sta = 0 ; int fin = 0 ; char op = o->getOp (); if (isCharacter (op) || isNull (op)) { sta = newNode (); fin = newNode (); nodes[sta].addEdge (newEdge (sta, fin, op)); } else if (isOr (op)) { sta = newNode (); handle left = subNFA (o->left); handle right = subNFA (o->right); fin = newNode (); nodes[sta].addEdge (newEdge (sta, left.first, '\0' )); nodes[sta].addEdge (newEdge (sta, right.first, '\0' )); nodes[right.second].addEdge (newEdge (right.second, fin, '\0' )); nodes[left.second].addEdge (newEdge (left.second, fin, '\0' )); } else if (isJoin (op)) { handle left = subNFA (o->left); --cnt_nodes; handle right = subNFA (o->right); sta = left.first; fin = right.second; } else if (isClosure (op)) { sta = newNode (); handle sub = subNFA (o->left); fin = newNode (); nodes[sta].addEdge (newEdge (sta, sub.first, '\0' )); nodes[sta].addEdge (newEdge (sta, fin, '\0' )); nodes[sub.second].addEdge (newEdge (sub.second, sub.first, '\0' )); nodes[sub.second].addEdge (newEdge (sub.second, fin, '\0' )); } else { cout << "invalid operator" << endl; } return handle (sta, fin); } bool NFA::backtracing_judge (const string &expression) { toJudge = expression; tot_length = toJudge.length (); return backtracing_judge (start,0 ); } bool NFA::backtracing_judge (int current_node, int current_index) { char c; bool flag; GraphNode &node=nodes[current_node]; if (current_index==tot_length){ if (current_node==finish){ return true ; } for (auto e:node.postEdges){ GraphEdge &edge=edges[e]; c=edge.getCharacter (); if (c=='\0' ){ flag=backtracing_judge (edge.to,current_index); } if (flag)return true ; } return false ; } for (auto e : node.postEdges) { GraphEdge &edge = edges[e]; c = edge.getCharacter (); if (c == '\0' ) { flag = backtracing_judge (edge.to, current_index); } if (flag) return true ; if (c == toJudge[current_index]) { flag = backtracing_judge (edge.to, current_index + 1 ); } if (flag) return true ; } return false ; } std::vector<int > NFA::getEpsilon (std::vector<int > T) { stack<int > s; bool visited[maxn]; std::vector<int > epsilon; memset (visited,false ,sizeof (visited)); int current_index; int next_index; for (auto u:T){ if (visited[u]==false ){ visited[u]=true ; s.push (u); } } while (!s.empty ()){ current_index=s.top (); s.pop (); epsilon.push_back (current_index); for (auto e:nodes[current_index].postEdges){ GraphEdge &edge=edges[e]; if (edge.getCharacter ()=='\0' ){ next_index=edge.getTo (); if (visited[next_index]==false ){ visited[next_index]=true ; s.push (next_index); } } } } return epsilon; } std::vector<int > NFA::smove (std::vector<int > S,char character) { std::vector<int > result; bool visited[maxn]; memset (visited,0 ,sizeof (visited)); int next_index; for (auto current_index:S){ GraphNode &node=nodes[current_index]; for (auto e:node.postEdges){ GraphEdge &edge=edges[e]; next_index=edge.getTo (); if (edge.getCharacter ()==character&&visited[next_index]==false ){ visited[next_index]=true ; result.push_back (next_index); } } } return result; } bool NFA::no_backtracing_judge (const std::string &str) { std::vector<int > S=getEpsilon ({start}); for (auto c:str){ S=getEpsilon (smove (S,c)); } return find (S.begin (),S.end (),finish)!=S.end (); }
验证
为了验证算法正确性,info函数会根据mermaid
graph图的语法打印,结果贴到typora mermaid代码块中即可可视化
输入的表达式是(a|b)*.a.b.b
,
程序的运行结果是
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 a b | * a . b . b . cnt_edges=13 1((1 )) 2((2 )) 3((3 )) 4((4 )) 5((5 )) 6((6 )) 7((7 )) 8((8 )) 9((9 )) 10((10 )) 11((11 )) 3--a-->4 5--b-->6 2---->3 2---->5 4---->7 6---->7 1---->2 1---->8 7---->2 7---->8 8--a-->9 9--b-->10 10--b-->11
把其中的mermaid语法抠出来乎到typora mermaid代码块中得到
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 graph LR 1((1)) 2((2)) 3((3)) 4((4)) 5((5)) 6((6)) 7((7)) 8((8)) 9((9)) 10((10)) 11((11)) 3--a-->4 5--b-->6 2---->3 2---->5 4---->7 6---->7 1---->2 1---->8 7---->2 7---->8 8--a-->9 9--b-->10 10--b-->11
教材上给出的NFA图是
image-20221019120541112
我从1开始给节点编号,教材从0开始给节点编号
显然结构是相同的
n2d
DFA中的每个节点都是NFA的状态集合,但是DFA建立之后就不需要NFA的状态了
只需要维护边上的字符
使用map判定一个状态集合是否已经存在过
DFA.h
1 2 3 4 5 6 7 8 9 10 11 12 13 14 #pragma once #include <vector> #include <string> #include <map> #include "Graph.h" #include "NFA.h" struct DFA : public Graph{ NFA &nfa; DFA(NFA &n); friend std ::ostream &operator<<(std ::ostream &os,DFA &dfa); };
DFA.cpp
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 #include <vector> #include <map> #include <string> #include <queue> #include <vector> #include <utility> #include "DFA.h" #include "NFA.h" typedef std::pair<std::vector<HNODE>,HNODE> Record;DFA::DFA (NFA &n):nfa (n){ newNode (GraphNode ()); std::map<std::vector<HNODE>,HNODE> table; std::queue<std::vector<HNODE> > q; HNODE hNode_current; HNODE hNode_next; std::vector<HNODE> current_statements=nfa.getEpsilon ({nfa.hNode_start}); hNode_current=newNode (GraphNode ()); std::vector<HNODE> next_statments; table.insert (Record (current_statements,hNode_current)); q.push (current_statements); while (!q.empty ()){ current_statements=q.front (); q.pop (); hNode_current=table[current_statements]; for (char c='a' ;c<='z' ;++c){ next_statments=nfa.getEpsilon (nfa.smove (current_statements,c)); if (next_statments.empty ())continue ; if (table.find (next_statments)==table.end ()){ hNode_next=newNode (GraphNode ()); table.insert (Record (next_statments,hNode_next)); q.push (next_statments); } else { hNode_next=table[next_statments]; } nodes[hNode_current].addEdge (newEdge (GraphEdge (hNode_current,hNode_next,c))); } } } std::ostream &operator <<(std::ostream &os,DFA &dfa){ for (HNODE hNode=1 ;hNode<dfa.nodes.size ();hNode++){ os<<hNode<<"((" <<hNode<<"))" <<std::endl; } for (auto edge:dfa.edges){ os<<edge<<std::endl; } return os; }
验证
image-20221110095752738
DFA最小化