C++ Program to Implement Suffix Tree

This C++ Program demonstrates the implementation of Suffix Tree.

Here is source code of the C++ Program to demonstrate the implementation of Suffix Tree. The C++ program is successfully compiled and run on a Linux system. The program output is also shown below.

  1. /*
  2.  * C++ Program to Implement Suffix Tree
  3.  */
  4. #include <iostream>
  5. #include <cstdlib>
  6. #include <cstring>
  7. #include <string>
  8.  
  9. using namespace std;
  10. typedef unsigned char byte;
  11. /* 
  12.  * SuffixNode Declaration
  13.  */
  14. class SuffixNode 
  15. {
  16.     public:
  17.         int depth, begin, end;
  18.         SuffixNode **children;
  19.         SuffixNode *parent, *suffixLink;  
  20.         /* 
  21.          * Constructor 
  22.          */
  23.         SuffixNode(int begin, int end, int depth, SuffixNode *parent) 
  24.         {
  25.             children = new SuffixNode* [38];
  26.             this->begin = begin;
  27.             this->end = end;
  28.             this->parent = parent;
  29.             this->depth = depth;
  30.         }
  31.  
  32.         bool contains(int d)
  33.         {
  34.             return depth <= d && d < depth + (end - begin);
  35.         }    
  36. };
  37. string alphabet; 
  38. int alphabetSize;
  39. int lcsLength;
  40. int lcsBeginIndex;
  41.  
  42. /* 
  43.  * Class SuffixTree Declaration
  44.  */
  45. class SuffixTree 
  46. {
  47.     public:
  48.         /* 
  49.          * Funtion to build suffix tree for given text 
  50.          */
  51.         SuffixNode *buildSuffixTree(string s) 
  52.         {
  53.             int n = s.length();
  54.             int *a = new int[n];
  55.             for (int i = 0; i < n; i++) 
  56.             {
  57.                 a[i] = alphabet.find(s.at(i));
  58.             }
  59.             SuffixNode *root = new SuffixNode(0, 0, 0, NULL);
  60.             SuffixNode *cn = root;
  61.             root->suffixLink = root;
  62.             SuffixNode *needsSuffixLink = NULL;
  63.             int lastRule = 0;
  64.             int j = 0;
  65.             for (int i = -1; i < n - 1; i++) 
  66.             {
  67.                 int cur = a[i + 1]; 
  68.                 for (; j <= i + 1; j++) 
  69.                 {
  70.                     int curDepth = i + 1 - j;
  71.                     if (lastRule != 3) 
  72.                     {
  73.                         if (cn->suffixLink != NULL) 
  74.                             cn = cn->suffixLink;
  75.                         else
  76.                             cn = cn->parent->suffixLink;
  77.                         int k = j + cn->depth;
  78.                         while (curDepth > 0 && !cn->contains(curDepth - 1)) 
  79.                         {
  80.                             k += cn->end - cn->begin;
  81.                             cn = cn->children[a[k]];
  82.                         }
  83.                     }
  84.                     if (!cn->contains(curDepth)) 
  85.                     { 
  86.                         if (needsSuffixLink != NULL) 
  87.                         {
  88.                             needsSuffixLink->suffixLink = cn;
  89.                             needsSuffixLink = NULL;
  90.                         }
  91.                         if (cn->children[cur] == NULL) 
  92.                         {
  93.                             cn->children[cur] = new SuffixNode(i + 1, n, curDepth, cn);
  94.                             lastRule = 2;
  95.                         }
  96.                         else 
  97.                         {    
  98.                             cn = cn->children[cur];
  99.                             lastRule = 3;
  100.                             break;
  101.                         }
  102.                     }
  103.                     else 
  104.                     { 
  105.                         int end = cn->begin + curDepth - cn->depth;
  106.                         if (a[end] != cur) 
  107.                         { 
  108.                             SuffixNode *newn = new SuffixNode(cn->begin, end, cn->depth, cn->parent);
  109.                             newn->children[cur] = new SuffixNode(i + 1, n, curDepth, newn);
  110.                             newn->children[a[end]] = cn;
  111.                             cn->parent->children[a[cn->begin]] = newn;
  112.                             if (needsSuffixLink != NULL) 
  113.                                 needsSuffixLink->suffixLink = newn;
  114.                             cn->begin = end;
  115.                             cn->depth = curDepth;
  116.                             cn->parent = newn;
  117.                             cn = needsSuffixLink = newn;
  118.                             lastRule = 2;
  119.                         } 
  120.                         else if (cn->end != n || (cn->begin - cn->depth) < j) 
  121.                         {
  122.                             lastRule = 3;
  123.                             break;
  124.                         }
  125.                         else
  126.                             lastRule = 1;                      
  127.                     }
  128.                 }
  129.             }
  130.             root->suffixLink = NULL;
  131.             return root;
  132.         }        
  133.         /* 
  134.          * Funtion to find longest common substring 
  135.          */
  136.         int findLCS(SuffixNode *node, int i1, int i2) 
  137.         {
  138.             if (node->begin <= i1 && i1 < node->end) 
  139.                 return 1;
  140.             if (node->begin <= i2 && i2 < node->end) 
  141.                 return 2;
  142.             int mask = 0;
  143.             for (int f = 0; f < alphabetSize; f++)
  144.             { 
  145.                 if (node->children[f] != NULL) 
  146.                 {   
  147.                     mask |= findLCS(node->children[f], i1, i2);
  148.                 }
  149.             }
  150.             if (mask == 3)
  151.             {
  152.                 int curLength = node->depth + node->end - node->begin;
  153.                 if (lcsLength < curLength) 
  154.                 {
  155.                     lcsLength = curLength;
  156.                     lcsBeginIndex = node->begin;
  157.                 }
  158.             }
  159.             return mask;
  160.         }
  161.  
  162.         /* 
  163.          * Funtion to find longest common substring 
  164.          */
  165.         void findLCS(string s1, string s2)
  166.         {
  167.             string x = "-";
  168.             string y = "#";
  169.             string ns1 = s1;
  170.             string ns2 = s2;
  171.             string s = s1.append(x.append(s2.append(y)));
  172.             SuffixNode *root = buildSuffixTree(s);
  173.             lcsLength = 0;
  174.             lcsBeginIndex = 0;               
  175.             findLCS(root, ns1.length(), ns1.length() + ns2.length() + 1);
  176.             bool chk = lcsLength > 0;
  177.             if (chk)
  178.             {
  179.                 cout<<"\nLongest substring is "<<s.substr(lcsBeginIndex , lcsLength);
  180.                 cout<<endl; 
  181.             }
  182.             else
  183.             {
  184.                 cout<<"No substring found"<<endl;
  185.             }
  186.         }
  187. };
  188.  
  189. /* 
  190.  * Main 
  191. */
  192. int main()
  193. {
  194.     alphabet = "abcdefghijklmnopqrstuvwxyz1234567890-#";
  195.     alphabetSize = alphabet.length();
  196.     string s1,s2;
  197.     cout<<"Finding longest common substring using suffix trees\n";
  198.     cout<<"Enter 1st String: ";
  199.     cin>>s1; 
  200.     cout<<"Enter 2nd String: ";
  201.     cin>>s2;
  202.     SuffixTree st;
  203.     st.findLCS(s1, s2);         
  204.     return 0;
  205. }

$ g++ suffixtree.cpp
$ a.out
 
Finding longest common substring using suffix trees
Enter 1st String: sanfoundry
Enter 2nd String: founder
 
Longest substring is found
 
 
------------------
(program exited with code: 1)
Press return to continue

Sanfoundry Global Education & Learning Series – 1000 C++ Programs.

advertisement
If you wish to look at all C++ Programming examples, go to C++ Programs.

advertisement
Subscribe to our Newsletters (Subject-wise). Participate in the Sanfoundry Certification to get free Certificate of Merit. Join our social networks below and stay updated with latest contests, videos, internships and jobs!

Youtube | Telegram | LinkedIn | Instagram | Facebook | Twitter | Pinterest
Manish Bhojasia - Founder & CTO at Sanfoundry
I’m Manish - Founder and CTO at Sanfoundry. I’ve been working in tech for over 25 years, with deep focus on Linux kernel, SAN technologies, Advanced C, Full Stack and Scalable website designs.

You can connect with me on LinkedIn, watch my Youtube Masterclasses, or join my Telegram tech discussions.

If you’re in your 20s–40s and exploring new directions in your career, I also offer mentoring. Learn more here.