## thompsons construction algorithm

### Data

in c++

#### Tags

automata theory, regular expressions

### Source Code

``````# THOMPSON'S CONSTRUCTION ALGORITHM #

This is an implementation of the Thompson's Construction Algorithm for converting a regular expression into its corresponding Non-Deterministic Finite Automaton.

For more such C++ implementations of Language and Grammar related algorithms, check out the repository -
#### github.com/AlexMathew/Automata-Generation ####``````
``````#include<iostream>
#include<vector>
#include<string>
#include<set>
#include<stack>

using namespace std;

struct trans {
int vertex_from;
int vertex_to;
char trans_symbol;
};

class NFA {
public:
vector<int> vertex;
vector<trans> transitions;
int final_state;

NFA() {

}

int get_vertex_count() {
return vertex.size();
}

void set_vertex(int no_vertex) {
for(int i = 0; i < no_vertex; i++) {
vertex.push_back(i);
}
}

void set_transition(int vertex_from, int vertex_to, char trans_symbol) {
trans new_trans;
new_trans.vertex_from = vertex_from;
new_trans.vertex_to = vertex_to;
new_trans.trans_symbol = trans_symbol;
transitions.push_back(new_trans);
}

void set_final_state(int fs) {
final_state = fs;
}

int get_final_state() {
return final_state;
}

void display() {
trans new_trans;
cout<<"\n";
for(int i = 0; i < transitions.size(); i++) {
new_trans = transitions.at(i);
cout<<"q"<<new_trans.vertex_from<<" --> q"<<new_trans.vertex_to<<" : Symbol - "<<new_trans.trans_symbol<<endl;
}
cout<<"\nThe final state is q"<<get_final_state()<<endl;
}
};

NFA concat(NFA a, NFA b) {
NFA result;
result.set_vertex(a.get_vertex_count() + b.get_vertex_count());
int i;
trans new_trans;

for(i = 0; i < a.transitions.size(); i++) {
new_trans = a.transitions.at(i);
result.set_transition(new_trans.vertex_from, new_trans.vertex_to, new_trans.trans_symbol);
}

result.set_transition(a.get_final_state(), a.get_vertex_count(), '^');

for(i = 0; i < b.transitions.size(); i++) {
new_trans = b.transitions.at(i);
result.set_transition(new_trans.vertex_from + a.get_vertex_count(), new_trans.vertex_to + a.get_vertex_count(), new_trans.trans_symbol);
}

result.set_final_state(a.get_vertex_count() + b.get_vertex_count() - 1);

return result;
}

NFA kleene(NFA a) {
NFA result;
int i;
trans new_trans;

result.set_vertex(a.get_vertex_count() + 2);

result.set_transition(0, 1, '^');

for(i = 0; i < a.transitions.size(); i++) {
new_trans = a.transitions.at(i);
result.set_transition(new_trans.vertex_from + 1, new_trans.vertex_to + 1, new_trans.trans_symbol);
}

result.set_transition(a.get_vertex_count(), a.get_vertex_count() + 1, '^');
result.set_transition(a.get_vertex_count(), 1, '^');
result.set_transition(0, a.get_vertex_count() + 1, '^');

result.set_final_state(a.get_vertex_count() + 1);

return result;
}

NFA or_selection(vector<NFA> selections, int no_of_selections) {
NFA result;
int vertex_count = 2;
int i, j;
NFA med;
trans new_trans;

for(i = 0; i < no_of_selections; i++) {
vertex_count += selections.at(i).get_vertex_count();
}

result.set_vertex(vertex_count);

for(i = 0; i < no_of_selections; i++) {
med = selections.at(i);
for(j = 0; j < med.transitions.size(); j++) {
new_trans = med.transitions.at(j);
}

result.set_transition(adder_track - 1, vertex_count - 1, '^');
}

result.set_final_state(vertex_count - 1);

return result;
}

NFA re_to_nfa(string re) {
stack<char> operators;
stack<NFA> operands;
char op_sym;
int op_count;
char cur_sym;
NFA *new_sym;

for(string::iterator it = re.begin(); it != re.end(); ++it) {
cur_sym = *it;
if(cur_sym != '(' && cur_sym != ')' && cur_sym != '*' && cur_sym != '|' && cur_sym != '.') {
new_sym = new NFA();
new_sym->set_vertex(2);
new_sym->set_transition(0, 1, cur_sym);
new_sym->set_final_state(1);
operands.push(*new_sym);
delete new_sym;
} else {
if(cur_sym == '*') {
NFA star_sym = operands.top();
operands.pop();
operands.push(kleene(star_sym));
} else if(cur_sym == '.') {
operators.push(cur_sym);
} else if(cur_sym == '|') {
operators.push(cur_sym);
} else if(cur_sym == '(') {
operators.push(cur_sym);
} else {
op_count = 0;
char c;
op_sym = operators.top();
if(op_sym == '(') continue;
do {
operators.pop();
op_count++;
} while(operators.top() != '(');
operators.pop();
NFA op1;
NFA op2;
vector<NFA> selections;
if(op_sym == '.') {
for(int i = 0; i < op_count; i++) {
op2 = operands.top();
operands.pop();
op1 = operands.top();
operands.pop();
operands.push(concat(op1, op2));
}
} else if(op_sym == '|'){
selections.assign(op_count + 1, NFA());
int tracker = op_count;
for(int i = 0; i < op_count + 1; i++) {
selections.at(tracker) = operands.top();
tracker--;
operands.pop();
}
operands.push(or_selection(selections, op_count+1));
} else {

}
}
}
}

return operands.top();
}

int main() {
cout<<"\n\nThe Thompson's Construction Algorithm takes a regular expression as an input "
<<"and returns its corresponding Non-Deterministic Finite Automaton \n\n";
cout<<"\n\nThe basic building blocks for constructing the NFA are : \n";

NFA a, b;

cout<<"\nFor the regular expression segment : (a)";
a.set_vertex(2);
a.set_transition(0, 1, 'a');
a.set_final_state(1);
a.display();
//	getch();

cout<<"\nFor the regular expression segment : (b)";
b.set_vertex(2);
b.set_transition(0, 1, 'b');
b.set_final_state(1);
b.display();
//	getch();

cout<<"\nFor the regular expression segment [Concatenation] : (a.b)";
NFA ab = concat(a, b);
ab.display();
//	getch();

cout<<"\nFor the regular expression segment [Kleene Closure] : (a*)";
NFA a_star = kleene(a);
a_star.display();
//	getch();

cout<<"\nFor the regular expression segment [Or] : (a|b)";
int no_of_selections;
no_of_selections = 2;
vector<NFA> selections(no_of_selections, NFA());
selections.at(0) = a;
selections.at(1) = b;
NFA a_or_b = or_selection(selections, no_of_selections);
a_or_b.display();
//	getch();

string re;
set<char> symbols;

cout<<"\n*****\t*****\t*****\n";
cout<<"\nFORMAT : \n"
<<"> Explicitly mention concatenation with a '.' operator \n"
<<"> Enclose every concatenation and or section by parantheses \n"
<<"> Enclose the entire regular expression with parantheses \n\n";

cout<<"For example : \nFor the regular expression (a.(b|c))  -- \n";
NFA example_nfa = re_to_nfa("(a.(b|c))");
example_nfa.display();

cout<<"\n\nEnter the regular expression in the above mentioned format - \n\n";
cin>>re;

/*	char cur_sym;
int counter = 0;
for(string::iterator it = re.begin(); it != re.end(); ++it) {
cur_sym = (*it);
if(cur_sym != '(' && cur_sym != ')' && cur_sym != '*' && cur_sym != '|' && cur_sym != '.') {
cout<<cur_sym<<" "<<counter++<<endl;
symbols.insert(cur_sym);
}
}
*/

cout<<"\n\nThe required NFA has the transitions : \n\n";

NFA required_nfa;
required_nfa = re_to_nfa(re);
required_nfa.display();

return 0;
}
``````