Trie 字典樹 – 陪你刷題

Trie 是一種樹狀結構,可以用來在眾多字串中快速搜尋某個字串。當然這樣問題也可以用 hash table 來解決,但是 Trie 在解決某些問題上有他的優勢,

  • 找出每個有特定 prefix 的字串
  • 按照字典順序列出所有字串

hash table 面對上述問題,有兩個缺點

  1. 如果 input 大部份都擁有一樣的 prefix ,hash table 相對 trie 會耗費較多記憶體空間。
  2. hash table 可能發生碰撞,這會使得搜尋的 time complexity 糟糕到 O (n) ,n 是總共插入的資料數量,然而 Trie 執行搜尋可以保持時間複雜度僅有 O (k),k 是搜尋字串的字元數量。

Trie 樹利用字串之間的共同前綴,將重複的前綴放在同一條路徑上,即可畫出如下圖的樹狀架構。

Trie 在現實生活中的應用相當常見,像是搜尋引擎的關鍵字建議或是 word 上的錯誤字訂正。

以下來看兩題 Leetcode 範例:

Leetcode #208 Implement a Trie

  • Trie 的 root 是不含資料
  • 每個 node 都包含一個 boolean 變數,紀錄該 node 是不是一個單字的結尾。

方法 1

  • 有 constructor 和 Destructor ,該題討論區很多解答都沒有完整的這樣寫。
  • 題目有說字串組成只包含小寫英文,可以直接透過 vector 劃分好大小。
class Trie {
public:
    /** Initialize your data structure here. */
    Trie() {
        root = new TrieNode();
    }

    /** Inserts a word into the trie. */
    void insert(string word) {
        TrieNode *curr = root;
        for (auto ch:word)
        {
            if (curr->children[ch-'a'] == nullptr)
            {
                struct TrieNode *newNode = new TrieNode();
                curr->children[ch-'a'] = newNode;
            }
            curr = curr->children[ch-'a'];
        }
        curr->isEndOfWord = true;
    }

    /** Returns if the word is in the trie. */
    bool search(string word) {
        TrieNode *curr = root;
        for (auto ch:word)
        {
            if (curr->children[ch-'a'] == nullptr)
                return false;
            curr = curr->children[ch-'a'];
        }
        return curr->isEndOfWord;
    }

    /** Returns if there is any word in the trie that starts with the given prefix. */
    bool startsWith(string prefix) {
        TrieNode *curr = root;
        for (auto ch:prefix)
        {
            if (curr->children[ch-'a'] == nullptr)
                return false;
            curr = curr->children[ch-'a'];
        }
        return true;
    }
private:
     struct TrieNode {
        bool isEndOfWord;
        vector<TrieNode*> children;

        TrieNode() {
            isEndOfWord = false;
            children = vector<TrieNode*>(26, nullptr);
        }
        ~TrieNode() {
            for (auto child:children)
            {
                if (child) delete child;
            }
        }
    };
    struct TrieNode *root;
};

方法 2 unordered_map

  • 如果可能字母只有英文字母,當然採取方法 1 的 array 即可,但如果可能的字元有很多可能,採取 unordered_map 會比較好
class Trie {
public:
    /** Initialize your data structure here. */
    Trie() {
        root = new TrieNode();
    }

    /** Inserts a word into the trie. */
    void insert(string word) {
        TrieNode *curr = root;
        for (auto ch:word)
        {
            if (!curr->children.count(ch))
            {
                struct TrieNode *newNode = new TrieNode();
                curr->children[ch] = newNode;
            }
            curr = curr->children[ch];
        }
        curr->isEndOfWord = true;
    }

    /** Returns if the word is in the trie. */
    bool search(string word) {
        TrieNode *curr = root;
        for (auto ch:word)
        {
            if (!curr->children.count(ch))
                return false;
            curr = curr->children[ch];
        }
        return curr->isEndOfWord;
    }

    /** Returns if there is any word in the trie that starts with the given prefix. */
    bool startsWith(string prefix) {
        TrieNode *curr = root;
        for (auto ch:prefix)
        {
            if (!curr->children.count(ch))
                return false;
            curr = curr->children[ch];
        }
        return true;
    }
private:
     struct TrieNode {
        bool isEndOfWord;
        unordered_map<char, TrieNode*> children;

        TrieNode() {
            isEndOfWord = false;
        }
        ~TrieNode() {
            for (auto child:children)
            {
                if (child.second) delete child.second;
            }
        }
    };
    TrieNode *root;
};

Leetcode #211 Design Add and Search Words Data Structure

這題為上一題的延伸,差別在於要處理 '.' 這個特殊字元。

方法 1 backtracking

  • 透過 queue 紀錄所有可能選項,只要遇到 '.' ,無條件將所有的 children 節點加入
class WordDictionary {
public:
    /** Initialize your data structure here. */
    WordDictionary() {
        root = new TrieNode ();
    }

    void addWord(string word) {
        TrieNode *curr = root;
        for (auto ch:word)
        {
            if (!curr->children.count(ch))
            {
                TrieNode *newNode = new TrieNode();
                curr->children[ch] = newNode;
            }
            curr = curr->children[ch];
        }
        curr->isEndOfWord = true;
    }

    bool search(string word) {
        queue<TrieNode *> qu;
        qu.push(root);

        for (auto ch:word)
        {
            int size = qu.size();
            for (int i=0; i<size; i++)
            {
                TrieNode *curr = qu.front();
                if (ch == '.')
                {
                    for (auto child:curr->children)
                    {
                        qu.push (child.second);
                    }
                }
                else
                {
                    if (curr->children.count(ch))
                    {
                        qu.push (curr->children[ch]);
                    }
                }
                qu.pop();
            }
        }
        // check all vector node's isEndOfWord
        while (!qu.empty())
        {
            TrieNode *curr = qu.front();
            if (curr->isEndOfWord)
                return true;
            qu.pop();
        }
        return false;
    }

private:
    struct TrieNode {
        bool isEndOfWord;
        unordered_map<char, TrieNode*> children;
        TrieNode () : isEndOfWord(false) { }
        ~TrieNode () {
            for (auto child:children)
            {
                if (child.second) delete child.second;
            }
        }
    };
    TrieNode *root;
};

方法 2 遞迴法

class WordDictionary {
public:
    /** Initialize your data structure here. */
    WordDictionary() {
        root = new TrieNode ();
    }

    void addWord(string word) {
        TrieNode *curr = root;
        for (auto ch:word)
        {
            if (!curr->children.count(ch))
            {
                TrieNode *newNode = new TrieNode();
                curr->children[ch] = newNode;
            }
            curr = curr->children[ch];
        }
        curr->isEndOfWord = true;
    }

    bool search(string word) {
        return searchWord (word.c_str(), root);
    }

private:
    struct TrieNode {
        bool isEndOfWord;
        unordered_map<char, TrieNode*> children;
        TrieNode () : isEndOfWord(false) { }
        ~TrieNode () {
            for (auto child:children)
            {
                if (child.second) delete child.second;
            }
        }
    };
    TrieNode *root;
    bool searchWord (const char* word, TrieNode *root)
    {
        for (int i=0; root && word[i]; i++)
        {
            if (word[i] == '.')
            {
                bool flag = false;
                for (auto child:root->children)
                {

                    flag = flag | searchWord (word+i+1, child.second);
                }
                return flag;
            }
            else
            {
                root = root->children[word[i]];
            }
        }
        return root && root->isEndOfWord;
    }
};

Reference

  1. 208. Implement Trie (Prefix Tree) - Leetcode

Updated on 2021-02-19 22:04:23 星期五