词法分析器

词法分析先修-表达式求值

求一个包含加减乘除乘方括号的表达式值,要求考虑运算优先级

比如1+2*3-(4+5)*6

迪杰斯特拉双栈法

双栈,一个用于存放操作数stack<double> operands,一个用于存放运算符stack<char> operaotors

运算优先级通过给每个运算符定义栈内外优先级来体现:

教材给出的优先级定义是:

icp,栈外优先级

isp,栈内优先级

优先级	(	+-	*/	)
栈内优先级isp	1	3	5	8
栈外优先级icp	8	2	4	1

双栈算法

对于一个操作数,直接压入operands栈

对于一个运算符c,

1.如果其栈外优先级比符号栈栈顶运算符的栈内优先级要高,则c入符号栈,即

1
2
3

if(operators.empty()||icp(c)>isp(operators.top())){
    operators.push(c);
}

2.如果其栈外优先级比符号栈栈顶优先级低,则首先要符号栈弹栈,每次弹出一个符号,顺带弹出两个操作数栈的数,根据该符号对这两个操作数进行运算之后将运算结果重新压入操作数栈.如此直到符号栈顶的栈内优先级小于c的栈外优先级时,c压栈

if(!operators.empty()&&icp(c)<isp(operators.top())){
	while(!operators.empty()&&icp(c)<isp(operators.top())){
        op=operators.top();
        operators.pop();
		if(operands.size()>=2){
			first=operands.top();
			operands.pop();
			second=operands.top();
			operands.pop();
            operands.push(basic_operate(op,first,second));
		}
		else error
	}
    if(operators.empty()||icp(c)>isp(operators.top())){
        operators.push(c);
    }
    else{//icp(c)==isp(operators.top())的情况,见3
        operators.pop();
    }
}

3.如果符号栈顶的栈内优先级等于c的栈外优先级,直接符号栈退栈一个,c也一并扔掉.

如果2中发生了符号栈退栈到某时,符号栈顶的栈内优先级等于c的栈外优先级,也是将栈顶和c都扬了

这是因为遇到了括号

1
2
3

if(icp(c)==isp(operators.top())){
	operators.pop()
}

完整代码

#include <iostream>
#include <stack>
#include <algorithm>
#include <string>
#include <vector>
using namespace std;


// string expression;
// int pos;
int Invalid;//失效标志


int icp(const char &c){//栈外优先级
    int priority=0;
    switch(c){
        case '+':case '-':
            priority=2;
            break;
        case '*':case '/':
            priority=4;
            break;        
        case ')':
            priority=1;
            break;
        case '(':
            priority=8;
    }
    return priority;
}
int isp(const char &c){//栈内优先级
    int priority=0;
    switch(c){
        case '+':case '-':
            priority=3;
            break;
        case '*':case '/':
            priority=5;
            break;        
        case ')':
            priority=8;
            break;
        case '(':
            priority=1;
    }
    return priority;
}

stack<char> operators;//操作数栈
stack<double> operands;//运算符栈
double basic_operate(const char &op,const double &first,const double &second){//根据op运算符计算first op second的值
    double result=0;
    switch(op){
        case '+':
            result=first+second;
            break;
        case '-':
            result=first-second;
            break;
        case '*':
            result=first*second;
            break;
        case '/':
            result=first/second;
            break;
    }
    return result;
}

string removeBlank(const string &str){//删除str中的空格
    string exp;
    for(auto c:str){
        if(c!=' '){
            exp+=c;
        }
    }
    return exp;
}


bool isDigit(const char &c){//判断是否为数字
    return c>='0'&&c<='9'||c=='.';
}

bool isValidOperator(const char &c){//c是否为合法运算符
    switch(c){
        case '+':case '-':case '*':case '/': case '(':case ')':
            return true;
    }
    return false;
}


bool canPushOperator(const char &c){//判断是否可以压符号栈
    if(operators.empty()||icp(c)>isp(operators.top())){
        return true;
    }
    return false;
}

bool matchBracket(const char &c){//判断是否匹配括号
    return !operators.empty()&&isp(operators.top())==icp(c);
}

bool getTwoOperands(double &first,double &second){//取得操作数栈顶两个
    if(operands.size()<2){
        Invalid=1;
        return false;
    }
    first=operands.top();
    operands.pop();
    second=operands.top();
    operands.pop();
    return true;
}



double aftermath(){//扫描完毕,处理后事
    double first,second;
    char op;
    while(!operators.empty()){//每次弹一个符号,两个数,计算之后结果再压回操作数栈
        op=operators.top();
        operators.pop();
        if(!getTwoOperands(first,second)){
            Invalid=1;
            return 0;
        }
        operands.push(basic_operate(op,first,second));
    }
    return operands.top();//到最后如果无误,符号栈应该弹空,操作数只剩一个栈顶,就是最终结果

}

void error(const string &err){
    cerr<<"error:"<<err<<endl;
}
double evalExpression(const string &expression){
    string buffer;//操作数转换缓冲区
    double digit;
    bool digit_flag=0;//连续取操作数开关
    double first,second;
    char op;
    double result;
    int length=expression.length();
    char c;
    for(int i=0;i<length;++i){
        c=expression[i];
        if(isDigit(c)){
            if(digit_flag==false){
                digit_flag=true;
                buffer=c;
            }
            else buffer+=c;
            cout<<"actived"<<endl;
            continue;
        }
        else if(!isValidOperator(c)){
            error("inValidOperator");
            Invalid=1;
            return 0;
        }
        if(digit_flag==true){
            digit_flag=false;
            digit=stod(buffer);
            operands.push(digit);
            buffer="";
        }
        if(canPushOperator(c)){
            operators.push(c);
        }
        else if(matchBracket(c)){
            operators.pop();
        }
        else{
            op=operators.top();
            operators.pop();
            if(!getTwoOperands(first,second)){
                error("lack of operands");
                return -1;
            }
            operands.push(basic_operate(op,first,second));
            --i;//回溯,c重新与符号栈顶进行比较
        }


    }
    //扫描完毕,但是如果最后是一个操作数,它此时还留在buffer缓冲区中,需要压栈
    if(digit_flag==true){
        digit=stod(buffer);
        operands.push(digit);
        digit_flag=false;
        buffer="";
    }
    
    result=aftermath();//处理后事,重要
    if(Invalid){
        return -1;
    }
    return result;
}

double calc(const string &str){
    string expression=removeBlank(str);
    cout<<expression<<endl;
    double result=evalExpression(expression);
    return result;
}


int main(){
    string text="1+2*3*4+(1+2)*3";
    double result=calc(text);
    cout<<result;
    return 0;
}

递归下降算法

三个函数,分成三层

最高层,将整个表达式看成若干项的和差,维护一个result,从左到右累加各项的和差.每一项都要扔给中间层函数进行计算

中间层,将最高层给的每一项都视为一个积式,维护一个result,从左到右累乘各项的积商.每一项都要扔给底层函数进行计算

最底层,将中间层给的每一项都视为一个数或者一个括号.根据是否有左括号进行区分.对于数就字符串转数然后返回了,对于括号则将括号里的表达式视为一个顶层和差式,扔给最高层

比如expression=1+(2+3)*4-5

最高层evalExpression会将这个表达式拆分成若干term的积
1
2
3
4
5
evalExpression(expression):
	return evalTerm(term1)+evalTerm(term2)-evalTerm(term3)
    	//term1=1
    	//term2=(2+3)*4
    	//term3=5
然后每个term都会往下扔,甩锅给中层函数evalTerm,其中term2具有代表性
1
2
3
4
evalTerm(term2):
	return evalFactor(factor1)*evalFactor(factor2)
	//factor1=(2+3)
	//factor2=4
然后每个factor又会往下扔
1
2
3
4
evalFactor((2+3)):
	return evalExpression(2+3)//我的天呐,底层工具人儿也有指使顶层人上人儿的一天?
evalFactor(4):
	return 4
底层工具人儿evalFactor算完了中层工具人儿evalTerm的任务之后,把值交给中层工具人儿
1
2
evalTerm(term2):
	return 5*4
中层工具人儿evalTerm算完了之后把值交给顶层人上人儿evalExpression
1
2
evalExpression(expression):
	return 1+20-5
如此整个表达式就求完了

完整程序

#include <iostream>
#include <algorithm>
#include <string>
using namespace std;
int Invalid;//模块全局变量,失效标记,进行新的计算之前首先应该检查是否失效,失效则可以偷懒了
int pos;//expression[pos],当前扫描位置,实时后移
string expression;//表达式

///递归下降求解表达式

double calc();//计算器
double evalExpression();//将str视为若干项的和差
double evalTerm();//将str视为若干项的积商
double evalFactor();//将str视为单个项或者一个括号表达式
string removeBlank(const string &str);//去除输入表达式中的空格

bool isEnd(){//检查是否扫描完毕
    return expression[pos]=='\0';
}

bool isAdd(const char &c){
    return c=='+';
}
bool isMinus(const char &c){
    return c=='-';
}
bool isLevel1(const char &c){//是否是顶层人上人儿的工作
    return isAdd(c)||isMinus(c);
}


bool isMultiply(const char &c){
    return c=='*';
}
bool isDivide(const char &c){
    return c=='/';
}
bool isLevel2(const char &c){//是否是中层工具人儿的工作
    return isDivide(c)||isMultiply(c);
}

bool isLeftBracket(const char &c){
    return c=='(';
}
bool isRightBracket(const char &c){
    return c==')';
}
bool isDigit(const char &c){
    return c>='0'&&c<='9';
}
bool isDot(const char &c){
    return c=='.';
}


string removeBlank(const string &str){//去除str中的空格
    string expression;
    for(auto c:str){
        if(c!=' '){//排除空格
            expression+=c;
        }
    }
    return expression;
}

double calc(const string &str){
    pos=0;//复位
    Invalid=0;
    expression=removeBlank(str);
    double result=evalExpression();
    return result;
}
double evalExpression(){//顶层人上人儿
    double result=0;//累加和
    double term;//term表示一个"加数"
    char op;//存放一个运算符
    
    term=evalTerm();//最左边应该是一个值,不应该是一个运算符(负号也算是数的一部分)
    if(Invalid){//检查evalTerm函数是否报错
        return 0;
    }
    result+=term;//如果没有报错就可以累加了
    while(!isEnd()){//从左向右扫描,最宽泛的停止范围就是扫描到头了
        op=expression[pos];

        if(!isLevel1(op)){//如果将各项都视为因数之后,还是遇到了非顶层运算符,比如括号,就应该返回结果了
            return result;
        }
        ++pos;//执行到此说明op是顶层的运算符,即加或者减法
        term=evalTerm();//pos后移越过了op,下一个理论上就应该是一个和式
        if(Invalid){//检查evalTerm是否有效
            return 0;
        }
        if(isAdd(op)){//根据op是加或者减决定刚算出的term如何累计到result上
            result+=term;
        }
        else if(isMinus(op)){
            result-=term;
        }

    }
    return result;
}
double evalTerm(){//中层工具人儿,逻辑与evalExpression几乎相同
    double result=1;
    double factor;
    char op;

    factor=evalFactor();
    if(Invalid){
        return 0;
    }
    result*=factor;
    while(!isEnd()){
        op=expression[pos];
        if(!isLevel2(op)){
            return result;
        }
        ++pos;
        factor=evalFactor();
        if(Invalid){
            return 0;
        }
        if(isMultiply(op)){
            result*=factor;
        }
        else if(isDivide(op)){
            result/=factor;
        }
    }
    return result;
}
double evalFactor(){//底层工具人儿
    double result=0;
    string buffer;
    bool isNegetive=0;//负数标志

    if(expression[pos]=='-'){//首先检查是不是负数,
        isNegetive=true;//命中负号,isNegetive支起来
        ++pos;//
    }
    if(isLeftBracket(expression[pos])){//遭遇括号,括号里面的视为表达式扔给顶层
        ++pos;
        result=evalExpression();
        if(!isRightBracket(expression[pos])){//顶层很自觉,只会处理加减式子,遇到右括号就会甩锅给中层,
        //中层也处理不了右括号,甩锅给下层,而下层正好在这里等着右括号呢
        //如果底层命中右括号则没有错误,否则就说明从顶层到中层到底层下来的这个符号是个外星人,不熟,发生错误
            Invalid=1;
            return 0;
        }
        ++pos;//到此说明右括号命中,pos++越过了右括号
        return result;//必须立刻返回,因为底层的任务就是计算一个值,括号里的表达式的值算完了就应该返回了
    }
    //执行到此说明不是括号表达式,是纯数,纯数只需要从字符串提取成整数然后上交
    while(isDigit(expression[pos])){
        buffer+=expression[pos];
        ++pos;
    }
    
    result=stod(buffer);
    if(isNegetive){//一开始的时候激活了负数标记,此时就应该给result上负号
        result*=-1;
    }
    return result;
}


string text;
double result;
int main(){
    text="1+2*3+(4+5)*6";
    cout<<calc(text)<<endl;
    return 0;
}

项目结构

┌──(root㉿Executor)-[/mnt/c/Users/86135/Desktop/parser/parser]
└─# tree
.
├── ds
│   ├── Graph.cpp
│   ├── Graph.h
│   ├── Hash.cpp
│   ├── Hash.h
│   ├── Tree.cpp
│   ├── Tree.h
│   ├── Trie.cpp
│   └── Trie.h
├── e2t
│   ├── e2t.cpp
│   └── e2t.h
├── main
├── main.cpp
├── main.exe
├── make.bat
├── make.sh
├── n2d
│   ├── DFA.cpp
│   ├── DFA.H
│   ├── DFA.md
│   ├── epsilon.cpp
│   ├── n2d.cpp
│   └── n2d.h
├── objects
│   ├── AC.o
│   ├── e2t.o
│   ├── Graph.o
│   ├── t2n.o
│   ├── Tree.o
│   └── Trie.o
├── README.md
├── swj.cpp
└── t2n
    ├── t2n.cpp
    └── t2n.h

采用管道过滤器模式

ds包括二叉树,图等基础数据结构,用作基类

e2t模块构建表达式树,该模块接收一个string类型,输出一个Tree类型

t2n模块构建NFA,该模块接收一个Tree类型,输出一个NFA类型

n2d模块构建DFA(正在此处),该模块接收一个NFA,输出一个DFA

DFA最小化(未完成),该模块接收一个DFA,输出一个最小化DFA

main.cpp是入口点

make.sh和make.bat为编译链接脚本,目前还不会使用makefile

ds

二叉树

Tree.h

#pragma once
#include <iostream>
// #include <string>
struct TreeNode
{
    char op;
    TreeNode *left, *right;
    bool isCharacter();
    char getOp();
    // TreeNode(char );
    TreeNode(char c = '\0', TreeNode *l = NULL, TreeNode *r = NULL) ;
    // TreeNode(char ,TreeNode *,TreeNode *);
    friend std::ostream &operator<<(std::ostream &, const TreeNode &);
};

struct Tree
{
public:
    TreeNode *root;
    void inOrder(TreeNode *);
    void postOrder(TreeNode *);

public:
    Tree(TreeNode *o=NULL);
    void inOrder();
    void postOrder();
};

Tree.cpp

#include <iostream>
#include "Tree.h"
using namespace std;
TreeNode::TreeNode(char c, TreeNode *l, TreeNode *r) : op(c), left(l), right(r) {}
bool TreeNode::isCharacter(){
    return left==NULL&&right==NULL;
}
char TreeNode::getOp(){
    return op;
}

ostream &operator<<(ostream &os, const TreeNode &o)
{
    os << o.op;
    return os;
}

void Tree::inOrder(TreeNode *o)
{
    if (o == NULL)
        return;
    inOrder(o->left);
    cout << (*o) << " ";
    inOrder(o->right);
}
void Tree::postOrder(TreeNode *o)
{
    if (o == NULL)
        return;
    postOrder(o->left);
    postOrder(o->right);
    cout << (*o) << " ";
}

Tree::Tree(TreeNode *o ) : root(o) {}
void Tree::inOrder()
{
    inOrder(root);
    cout << endl;
}

void Tree::postOrder()
{
    postOrder(root);
    cout << endl;
}

有向图

Graph.h

#pragma once
#include <iostream>
#include <vector>
#define maxn 100000
struct GraphEdge{
    int from;
    int to;
    char character;
    GraphEdge(int,int,char);
    void setValue(int,int,char);
    int getTo();
    friend std::ostream &operator<<(std::ostream &,const GraphEdge&);
    char getCharacter();
};

struct GraphNode{
    // int id;
    bool isEnd;
    std::vector<int> postEdges;
    void addEdge(const int &);
    GraphNode();
    bool end();
    friend std::ostream &operator<<(std::ostream &,const GraphNode&);
};

struct Graph{
    GraphEdge edges[maxn];
    GraphNode nodes[maxn];
    int cnt_edges;
    int cnt_nodes;
    // std::vector<GraphEdge> edges;
    // std::vector<GraphNode> nodes;

    int start;
    int finish;
    Graph();

    int newEdge(const int &f,const int &t,const char &c);
    int newNode();
    void info();
};

Graph.cpp

#include <iostream>
#include <vector>
#include <algorithm>
#include "Graph.h"
using namespace std;
/// @brief GraphEdge构造函数
/// @param f from,起点
/// @param t to,终点
/// @param c character,本边上的字符
GraphEdge::GraphEdge(int f=0,int t=0,char c='\0'){
    from=f;
    to=t;
    character=c;
}


void GraphEdge::setValue(int f=0,int t=0,char c='\0'){
    from=f;
    to=t;
    character=c;
}


int GraphEdge::getTo(){
    return to;
}
char GraphEdge::getCharacter(){
    return character;
}
ostream &operator<<(ostream &os,const GraphEdge &edge){
    if(edge.character=='\0'){
        os<<edge.from<<"---->"<<edge.to;
    }
    else 
        os<<edge.from<<"--\""<<edge.character<<"\"-->"<<edge.to;
    return os;
}

GraphNode::GraphNode(){ 
    isEnd=false;
}

void GraphNode::addEdge(const int &e){
    
    postEdges.push_back(e);
}
bool GraphNode::end(){
    return isEnd;
}



ostream &operator<<(ostream &os,const GraphNode &node){//stub
    os<<"node";
    return os;
}

Graph::Graph(){
    start=finish=0;
    cnt_edges=cnt_nodes=0;
}
int Graph::newEdge(const int &f,const int &t,const char &c){
    ++cnt_edges;
    edges[cnt_edges].setValue(f,t,c);
    return cnt_edges;
}
int Graph::newNode(){
    ++cnt_nodes;
    return cnt_nodes;
}


void Graph::info(){
    for(int i=1;i<=cnt_nodes;++i){
        cout<<i<<"(("<<i<<"))"<<endl;
    }
    for(int i=1;i<=cnt_edges;++i){
        cout<<edges[i]<<endl;
    }
}

e2t

expression to tree

给定一个正规式,构造其表达式树

正规式中包含操作数都是单个字母,运算符有:

运算符	符号	优先级
括号	`()`	最高
闭包	`*`	次高
连接	`.`	次低
或	`\|`	最低

都是左结合运算,其中

连接,或,都是二元运算符

星闭包是一元运算符,操作数在左

比如a|(b.c)*|d.e对应的表达式树就是:

1	D:\XDcompAT>run-compat.bat RE-TREE "a\|(bc)*\|de" -g

右边这个图,每个节点最多往下分两个叉,因此可以用二叉树实现,只有闭包运算这里是只有一个儿子的,可以作为二叉树节点的左儿子,右儿子为空

e2t.h

#pragma once
#include "../ds/Tree.h"
//以下函数都是测试字符性质
bool isOr(const char &);
bool isJoin(const char &);
bool isClosure(const char &);
bool isLeftBracket(const char &);
bool isRightBracket(const char &);
bool isEnd();
char getCurrentOperator();
bool isAlpha(const char &);
bool isCharacter(const char &);
bool isNull(const char &);
bool isSpace(const char &);
TreeNode *evalOr();
TreeNode *evalJoin();
TreeNode *evalClosure();
Tree getTree(const std::string &);

e2t.cpp

#include <iostream>
#include <algorithm>
#include <string>
#include <vector>
#include <cmath>
#include "../ds/Tree.h"
#include "e2t.h"
using namespace std;
//模块全局变量,用于多个函数之间通信
static string expression;
static int pos;
static int Invalid;
// TreeNode和Tree为二叉树数据结构

//以下函数都是测试字符性质
bool isOr(const char &c)
{
    return c == '|';
}
bool isJoin(const char &c)
{
    return c == '.';
}
bool isClosure(const char &c)
{
    return c == '*';
}
bool isLeftBracket(const char &c)
{
    return c == '(';
}
bool isRightBracket(const char &c)
{
    return c == ')';
}
bool isEnd()
{
    return expression[pos] == '\0';
}
char getCurrentOperator()
{
    return expression[pos];
}
bool isAlpha(const char &c)
{
    if (c >= 'a' && c <= 'z')
        return true;
    if (c >= 'A' && c <= 'Z')
        return true;
    return false;
}
bool isCharacter(const char &c){
    if(isAlpha(c))return true;
    return c!='.'&&c!='|'&&c!='('&&c!=')'&&c!='*';
}
bool isNull(const char &c){
    return c=='\0';
}
bool isSpace(const char &c)
{
    return c == ' ';
}

Tree getTree(const string &str) //传递一个str表达式,构建一个语法树并返回相应二叉树实例
{                               //消除空格,添加点号
    string exp;
    string removeSpace = "";
    for (auto c : str)
    {
        if (isSpace(c))
            continue;
        removeSpace += c;
    }
    int length = removeSpace.length();
    for (int i = 0; i < length; ++i)
    {
        if (isAlpha(removeSpace[i]) && i > 0 && isAlpha(removeSpace[i - 1]))
        {
            exp += '.';
        }
        exp += removeSpace[i];
    }
    expression = exp;
    Invalid = 0;
    pos = 0;
    return Tree(evalOr());
}

TreeNode *evalOr() //顶层函数,将表达式视为若干项目的或
{
    TreeNode *joinTerm = NULL;
    TreeNode *root = NULL;
    TreeNode *assist = NULL;
    char op = '\0';

    joinTerm = evalJoin();
    if (Invalid)
    {
        return NULL;
    }
    root = joinTerm;
    while (!isEnd())
    {
        op = getCurrentOperator();
        if (!isOr(op))
        {
            return root;
        }
        ++pos;
        joinTerm = evalJoin();
        if (Invalid)
        {
            return NULL;
        }
        assist = root;
        root = new TreeNode(op, assist, joinTerm);
    }
    return root;
}

TreeNode *evalJoin() //二层函数,将局部表达式视为若干项目的连接
{
    TreeNode *clousureTerm = NULL;
    TreeNode *root = NULL;
    TreeNode *assist = NULL;
    char op = '\0';

    clousureTerm = evalClosure();
    if (Invalid)
    {
        return NULL;
    }
    root = clousureTerm;
    while (!isEnd())
    {
        op = getCurrentOperator();
        if (!isJoin(op))
        {
            return root;
        }
        ++pos;
        clousureTerm = evalClosure();
        if (Invalid)
        {
            return NULL;
        }
        assist = root;
        root = new TreeNode(op, assist, clousureTerm);
    }
    return root;
}

TreeNode *evalClosure() //底层函数,将局部表达式视为括号或者树,并且判断是否有星闭包
{
    TreeNode *joinTerm;
    TreeNode *singleTerm;
    TreeNode *root;
    TreeNode *assist;
    char op;

    op = getCurrentOperator();
    if (isLeftBracket(op))
    {
        ++pos;
        joinTerm = evalOr();
        op = getCurrentOperator();
        if (!isRightBracket(op))
        {
            Invalid = 1;
            return NULL;
        }
        root = joinTerm;
        ++pos;
    }
    else
    {
        singleTerm = new TreeNode(op);
        ++pos;
        root = singleTerm;
    }
    op = getCurrentOperator();
    if (!isClosure(op))
    {
        return root;
    }
    ++pos; //越过闭包星号
    assist = root;
    root = new TreeNode(op, assist);
    return root;
}



//测试存根
// int main()
// {
//     string exp = "a  |(b.  c)*|d e";//测试正规式子
//     Tree tree = getTree(exp);//
//     tree.postOrder();
//     tree.inOrder();

//     return 0;
// }

编译运行

1 2	a b c . * \| d e . \| #后根遍历 a \| b . c * \| d . e #中根遍历

与教具给出的二叉树图是一样的

t2n

tree to nfa

NFA是一个有向图,可以以Graph为基类

t2n.h

#pragma once
#include <vector>
#include <iostream>
#include <utility>
#include "../ds/Graph.h"
#include "../ds/Tree.h"
typedef std::pair<int,int> handle;
struct NFA:public Graph{
    std::string toJudge;
    int tot_length;
    int start;
    int finish;

    NFA(Tree &);
    handle subNFA(TreeNode *);
    bool backtracing_judge(const std::string &expression);
    bool backtracing_judge(int current_node,int current_index);
    std::vector<int> getEpsilon(std::vector<int> T);
    std::vector<int> smove(std::vector<int> S,char character);
    bool no_backtracing_judge(const std::string &str);
};

t2n.cpp

主要逻辑,就是一共getNFA递归函数,使用了汤普森算法

该函数接收一个语法树根,根据该根节点中的符号指示,决定如何建立子图,返回当前子图

#include <iostream>
#include <algorithm>
#include <string>
#include <vector>
#include <cmath>
#include <utility>
#include <stack>
#include <cstring>
#include <cstdio>
#include <cstdlib>
#include "../e2t/e2t.h"
#include "t2n.h"
using namespace std;

// #define start(handle) (handle.first)

NFA::NFA(Tree &tree)
{
    handle p = subNFA(tree.root);
    start = p.first;
    finish = p.second;
}
handle NFA::subNFA(TreeNode *o)
{
    int sta = 0;
    int fin = 0;
    char op = o->getOp();
    if (isCharacter(op) || isNull(op))
    {
        sta = newNode();
        fin = newNode();
        nodes[sta].addEdge(newEdge(sta, fin, op));
    }
    else if (isOr(op))
    {
        sta = newNode();
        handle left = subNFA(o->left);
        handle right = subNFA(o->right);
        fin = newNode();
        nodes[sta].addEdge(newEdge(sta, left.first, '\0'));
        nodes[sta].addEdge(newEdge(sta, right.first, '\0'));
        nodes[right.second].addEdge(newEdge(right.second, fin, '\0'));
        nodes[left.second].addEdge(newEdge(left.second, fin, '\0'));
    }
    else if (isJoin(op))
    {
        handle left = subNFA(o->left);
        --cnt_nodes;
        handle right = subNFA(o->right);
        sta = left.first;
        fin = right.second;
        // nodes[left.second].addEdge(newEdge(left.second,right.first,'\0'));
    }
    else if (isClosure(op))
    {
        sta = newNode();
        handle sub = subNFA(o->left);
        fin = newNode();
        nodes[sta].addEdge(newEdge(sta, sub.first, '\0'));
        nodes[sta].addEdge(newEdge(sta, fin, '\0'));
        nodes[sub.second].addEdge(newEdge(sub.second, sub.first, '\0'));
        nodes[sub.second].addEdge(newEdge(sub.second, fin, '\0'));
    }
    else
    {
        cout << "invalid operator" << endl;
    }
    return handle(sta, fin);
}

bool NFA::backtracing_judge(const string &expression)
{
    toJudge = expression;
    tot_length = toJudge.length();
    return backtracing_judge(start,0);
}
bool NFA::backtracing_judge(int current_node, int current_index)
{   
    char c;
    bool flag;
    GraphNode &node=nodes[current_node];

    if(current_index==tot_length){
        if(current_node==finish){
            return true;
        }
        for(auto e:node.postEdges){
            GraphEdge &edge=edges[e];
            c=edge.getCharacter();
            if(c=='\0'){
                flag=backtracing_judge(edge.to,current_index);
            }
            if(flag)return true;
        }
        return false;
    }

    for (auto e : node.postEdges)
    {
        GraphEdge &edge = edges[e];
        c = edge.getCharacter();
        if (c == '\0')
        {
            flag = backtracing_judge(edge.to, current_index);
        }
        if (flag)
            return true;
        if (c == toJudge[current_index])
        {
            flag = backtracing_judge(edge.to, current_index + 1);
        }
        if (flag)
            return true;
    }
    return false;
}


std::vector<int> NFA::getEpsilon(std::vector<int> T){
    stack<int> s;
    bool visited[maxn];
    std::vector<int> epsilon;
    memset(visited,false,sizeof(visited));
    int current_index;
    int next_index;
    for(auto u:T){
        if(visited[u]==false){
            visited[u]=true;
            s.push(u);
        }
    }
    while(!s.empty()){
        current_index=s.top();
        s.pop();
        epsilon.push_back(current_index);
        for(auto e:nodes[current_index].postEdges){
            GraphEdge &edge=edges[e];
            if(edge.getCharacter()=='\0'){
                next_index=edge.getTo();
                if(visited[next_index]==false){
                    visited[next_index]=true;
                    s.push(next_index);
                }
            }
        }
    }
    return epsilon;

}

//从S集合开始,经过character字符能够转移到的状态集合
std::vector<int> NFA::smove(std::vector<int> S,char character){
    std::vector<int> result;
    bool visited[maxn];
    memset(visited,0,sizeof(visited));
    int next_index;
    for(auto current_index:S){
        GraphNode &node=nodes[current_index];
        for(auto e:node.postEdges){
            GraphEdge &edge=edges[e];
            next_index=edge.getTo();
            if(edge.getCharacter()==character&&visited[next_index]==false){
                visited[next_index]=true;
                result.push_back(next_index);
            }
        }
    }


    return result;
}

bool NFA::no_backtracing_judge(const std::string &str){
    std::vector<int> S=getEpsilon({start});
    for(auto c:str){
        S=getEpsilon(smove(S,c));
    }
    return find(S.begin(),S.end(),finish)!=S.end();
}

验证

为了验证算法正确性,info函数会根据mermaid graph图的语法打印,结果贴到typora mermaid代码块中即可可视化

输入的表达式是(a|b)*.a.b.b,

程序的运行结果是

a b | * a . b . b . 
cnt_edges=13
1((1))
2((2))
3((3))
4((4))
5((5))
6((6))
7((7))
8((8))
9((9))
10((10))
11((11))

3--a-->4
5--b-->6
2---->3
2---->5
4---->7
6---->7
1---->2
1---->8
7---->2
7---->8
8--a-->9
9--b-->10
10--b-->11

把其中的mermaid语法抠出来乎到typora mermaid代码块中得到

graph LR
    1((1))
    2((2))
    3((3))
    4((4))
    5((5))
    6((6))
    7((7))
    8((8))
    9((9))
    10((10))
    11((11))

    3--a-->4
    5--b-->6
    2---->3
    2---->5
    4---->7
    6---->7
    1---->2
    1---->8
    7---->2
    7---->8
    8--a-->9
    9--b-->10
    10--b-->11

教材上给出的NFA图是

我从1开始给节点编号,教材从0开始给节点编号

显然结构是相同的

n2d

DFA中的每个节点都是NFA的状态集合,但是DFA建立之后就不需要NFA的状态了

只需要维护边上的字符

使用map判定一个状态集合是否已经存在过

DFA.h

#pragma once
#include <vector>
#include <string>
#include <map>
#include "Graph.h"
#include "NFA.h"

struct DFA:public Graph{
    NFA &nfa;
    
    DFA(NFA &n);

    friend std::ostream &operator<<(std::ostream &os,DFA &dfa);
};

DFA.cpp

#include <vector>
#include <map>
#include <string>
#include <queue>
#include <vector>
#include <utility>
#include "DFA.h"
#include "NFA.h"

typedef std::pair<std::vector<HNODE>,HNODE> Record;
DFA::DFA(NFA &n):nfa(n){
    newNode(GraphNode());
    std::map<std::vector<HNODE>,HNODE> table;
    std::queue<std::vector<HNODE> > q;
    HNODE hNode_current;
    HNODE hNode_next;

    std::vector<HNODE> current_statements=nfa.getEpsilon({nfa.hNode_start});
    hNode_current=newNode(GraphNode());
    std::vector<HNODE> next_statments;
    table.insert(Record(current_statements,hNode_current));
    q.push(current_statements);
    while(!q.empty()){
        current_statements=q.front();
        q.pop();
        hNode_current=table[current_statements];
        
        for(char c='a';c<='z';++c){
            next_statments=nfa.getEpsilon(nfa.smove(current_statements,c));
            if(next_statments.empty())continue;
            if(table.find(next_statments)==table.end()){
                hNode_next=newNode(GraphNode());
                table.insert(Record(next_statments,hNode_next));
                q.push(next_statments);
            }
            else{
                hNode_next=table[next_statments];
            }
            nodes[hNode_current].addEdge(newEdge(GraphEdge(hNode_current,hNode_next,c)));
        }
    }
}



std::ostream &operator<<(std::ostream &os,DFA &dfa){
    for(HNODE hNode=1;hNode<dfa.nodes.size();hNode++){
        os<<hNode<<"(("<<hNode<<"))"<<std::endl;
    }
    for(auto edge:dfa.edges){
        os<<edge<<std::endl;
    }
    return os;
}

验证