73 lines
1.4 KiB
C++
73 lines
1.4 KiB
C++
#include "word_count.h"
|
|
|
|
#include <algorithm>
|
|
|
|
|
|
namespace word_count {
|
|
|
|
using namespace std;
|
|
|
|
|
|
Words words(const string &str, const locale &loc)
|
|
{
|
|
Words result;
|
|
|
|
enum {
|
|
|
|
InsideWord,
|
|
OutsideWord
|
|
|
|
} state(OutsideWord);
|
|
|
|
|
|
string::const_iterator begin, end, i;
|
|
i = begin = end = str.begin();
|
|
|
|
for (; i != str.end(); ++i) {
|
|
|
|
bool is_alpha_or_digit = isalpha(*i, loc) || isdigit(*i, loc);
|
|
bool is_word_char = is_alpha_or_digit || *i == '\'';
|
|
|
|
switch (state) {
|
|
|
|
case OutsideWord:
|
|
|
|
if (is_alpha_or_digit) {
|
|
|
|
state = InsideWord;
|
|
begin = i;
|
|
end = next(i);
|
|
}
|
|
|
|
break;
|
|
|
|
case InsideWord:
|
|
|
|
if (is_alpha_or_digit) {
|
|
|
|
end = next(i);
|
|
}
|
|
|
|
if (!is_word_char || next(i) == str.end()) {
|
|
|
|
state = OutsideWord;
|
|
|
|
if (begin != end) {
|
|
|
|
string word;
|
|
word.reserve(distance(begin, end));
|
|
transform(begin, end, back_inserter(word),
|
|
[=](char c){return tolower(c, loc);});
|
|
|
|
result[word]++;
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
}
|