面试题 17.13

Oh, no! You have accidentally removed all spaces, punctuation, and capitalization in a lengthy document. A sentence like "I reset the computer. It still didn't boot!" became "iresetthecomputeritstilldidntboot''. You'll deal with the punctuation and capi­talization later; right now you need to re-insert the spaces. Most of the words are in a dictionary but a few are not. Given a dictionary (a list of strings) and the document (a string), design an algorithm to unconcatenate the document in a way that minimizes the number of unrecognized characters. Return the number of unrecognized characters.

Note: This problem is slightly different from the original one in the book.

Example:

Input: dictionary = ["looked","just","like","her","brother"] sentence = "jesslookedjustliketimherbrother" Output: 7 Explanation: After unconcatenating, we got "jess looked just like tim her brother", which containing 7 unrecognized characters. Note:

0 <= len(sentence) <= 1000 The total number of characters in dictionary is less than or equal to 150000. There are only lowercase letters in dictionary and sentence.

Solutions

  1. dynamic programming

  2. number of characters not in dictionary = len(sentence) - maximum length of words can be partitioned in the sentence with words in dictionary.

class Solution {
public:
    int respace(vector<string>& dictionary, string sentence) {
        int n = sentence.size();
        string_view sv(sentence);
        vector<size_t> dp(n + 1, 0);
        for (int j = 1; j <= n; j++) {
            for (auto & w : dictionary) {
                if (!w.size() || j < w.size()) continue;
                if (sv.substr(j - w.size(), w.size()) == w)
                    dp[j] = max(dp[j], dp[j - w.size()] + w.size());
            }
            dp[j] = max(dp[j], dp[j - 1]);
        }

        return sv.size() - dp[n];
    }
};
  1. dynamic programming with trie

  2. use tries(suffix) to speed up the searching of words in the dictionary.

struct Trie {
    Trie* nodes[26] = {nullptr};
    int wlen = 0;
    ~Trie() {
        for (auto pnode : nodes)
            if (pnode) delete pnode;
    }
    template <typename It>
    void insert(It lo, It hi) {
        if (lo == hi) return;
        Trie * cur = this;
        int len = 0;
        while (lo != hi) {
            len++;
            auto c = *lo - 'a'; ++lo;
            if (!cur->nodes[c])
                cur->nodes[c] = new Trie;
            cur = cur->nodes[c];
        }
        cur->wlen = len; // wlen > 0 represents this word exists
    }
};

class Solution {
public:
    int respace(vector<string>& dictionary, string s) {
        int n = s.size(); if (!n) return 0;

        Trie trie;
        for (auto & w : dictionary)
            trie.insert(w.rbegin(), w.rend());

        vector<size_t> dp(n + 1);
        for (int j = 1; j <= n; j++) {
            Trie * cur = &trie;
            auto cit = s.rend() - j;
            while (cit != s.rend() && cur->nodes[*cit - 'a']) {
                cur = cur->nodes[*cit - 'a'];
                int wlen = cur->wlen;
                if (wlen > 0)
                    dp[j] = max(dp[j], dp[j - wlen] + wlen);
                ++cit;
            }
            dp[j] = max(dp[j], dp[j - 1]);
        }

        return s.size() - dp[n];
    }
};

Last updated

Was this helpful?