Python: word_count

This commit is contained in:
Dmitry Kokorin 2021-06-08 16:30:49 +03:00
parent 6223031c18
commit 0dd5b31529
4 changed files with 201 additions and 0 deletions

View file

@ -0,0 +1 @@
{"track":"python","exercise":"word-count","id":"5ff53f015ab940dfa8ca06bcd9c6cd76","url":"https://exercism.io/my/solutions/5ff53f015ab940dfa8ca06bcd9c6cd76","handle":"DmitryKokorin","is_requester":true,"auto_approve":false}

View file

@ -0,0 +1,78 @@
# Word Count
Given a phrase, count the occurrences of each _word_ in that phrase.
For the purposes of this exercise you can expect that a _word_ will always be one of:
1. A _number_ composed of one or more ASCII digits (ie "0" or "1234") OR
2. A _simple word_ composed of one or more ASCII letters (ie "a" or "they") OR
3. A _contraction_ of two _simple words_ joined by a single apostrophe (ie "it's" or "they're")
When counting words you can assume the following rules:
1. The count is _case insensitive_ (ie "You", "you", and "YOU" are 3 uses of the same word)
2. The count is _unordered_; the tests will ignore how words and counts are ordered
3. Other than the apostrophe in a _contraction_ all forms of _punctuation_ are ignored
4. The words can be separated by _any_ form of whitespace (ie "\t", "\n", " ")
For example, for the phrase `"That's the password: 'PASSWORD 123'!", cried the Special Agent.\nSo I fled.` the count would be:
```text
that's: 1
the: 2
password: 2
123: 1
cried: 1
special: 1
agent: 1
so: 1
i: 1
fled: 1
```
## Exception messages
Sometimes it is necessary to raise an exception. When you do this, you should include a meaningful error message to
indicate what the source of the error is. This makes your code more readable and helps significantly with debugging. Not
every exercise will require you to raise an exception, but for those that do, the tests will only pass if you include
a message.
To raise a message with an exception, just write it as an argument to the exception type. For example, instead of
`raise Exception`, you should write:
```python
raise Exception("Meaningful message indicating the source of the error")
```
## Running the tests
To run the tests, run `pytest word_count_test.py`
Alternatively, you can tell Python to run the pytest module:
`python -m pytest word_count_test.py`
### Common `pytest` options
- `-v` : enable verbose output
- `-x` : stop running tests on first failure
- `--ff` : run failures from previous test before running other test cases
For other options, see `python -m pytest -h`
## Submitting Exercises
Note that, when trying to submit an exercise, make sure the solution is in the `$EXERCISM_WORKSPACE/python/word-count` directory.
You can find your Exercism workspace by running `exercism debug` and looking for the line that starts with `Workspace`.
For more detailed information about running tests, code style and linting,
please see [Running the Tests](http://exercism.io/tracks/python/tests).
## Source
This is a classic toy problem, but we were reminded of it by seeing it in the Go Tour.
## Submitting Incomplete Solutions
It's possible to submit an incomplete solution so you can see how others have completed the exercise.

View file

@ -0,0 +1,9 @@
from collections import Counter
import re
WORDS_REGEXP = re.compile(r"([^\W_]+('[^\W_]+)?)")
def count_words(sentence):
words = [value[0].lower() for value in re.findall(WORDS_REGEXP, sentence)]
return Counter(words)

View file

@ -0,0 +1,113 @@
import unittest
from word_count import count_words
# Tests adapted from `problem-specifications//canonical-data.json`
class WordCountTest(unittest.TestCase):
def test_count_one_word(self):
self.assertEqual(count_words("word"), {"word": 1})
def test_count_one_of_each_word(self):
self.assertEqual(count_words("one of each"), {"one": 1, "of": 1, "each": 1})
def test_multiple_occurrences_of_a_word(self):
self.assertEqual(
count_words("one fish two fish red fish blue fish"),
{"one": 1, "fish": 4, "two": 1, "red": 1, "blue": 1},
)
def test_handles_cramped_lists(self):
self.assertEqual(count_words("one,two,three"), {"one": 1, "two": 1, "three": 1})
def test_handles_expanded_lists(self):
self.assertEqual(
count_words("one,\ntwo,\nthree"), {"one": 1, "two": 1, "three": 1}
)
def test_ignore_punctuation(self):
self.assertEqual(
count_words("car: carpet as java: javascript!!&@$%^&"),
{"car": 1, "carpet": 1, "as": 1, "java": 1, "javascript": 1},
)
def test_include_numbers(self):
self.assertEqual(
count_words("testing, 1, 2 testing"), {"testing": 2, "1": 1, "2": 1}
)
def test_normalize_case(self):
self.assertEqual(count_words("go Go GO Stop stop"), {"go": 3, "stop": 2})
def test_with_apostrophes(self):
self.assertEqual(
count_words("First: don't laugh. Then: don't cry."),
{"first": 1, "don't": 2, "laugh": 1, "then": 1, "cry": 1},
)
def test_with_quotations(self):
self.assertEqual(
count_words("Joe can't tell between 'large' and large."),
{"joe": 1, "can't": 1, "tell": 1, "between": 1, "large": 2, "and": 1},
)
def test_substrings_from_the_beginning(self):
self.assertEqual(
count_words("Joe can't tell between app, apple and a."),
{
"joe": 1,
"can't": 1,
"tell": 1,
"between": 1,
"app": 1,
"apple": 1,
"and": 1,
"a": 1,
},
)
def test_multiple_spaces_not_detected_as_a_word(self):
self.assertEqual(
count_words(" multiple whitespaces"), {"multiple": 1, "whitespaces": 1}
)
def test_alternating_word_separators_not_detected_as_a_word(self):
self.assertEqual(
count_words(",\n,one,\n ,two \n 'three'"), {"one": 1, "two": 1, "three": 1}
)
# Additional tests for this track
def test_tabs(self):
self.assertEqual(
count_words(
"rah rah ah ah ah roma roma ma ga ga oh la la want your bad romance"
),
{
"rah": 2,
"ah": 3,
"roma": 2,
"ma": 1,
"ga": 2,
"oh": 1,
"la": 2,
"want": 1,
"your": 1,
"bad": 1,
"romance": 1,
},
)
def test_non_alphanumeric(self):
self.assertEqual(
count_words("hey,my_spacebar_is_broken"),
{"hey": 1, "my": 1, "spacebar": 1, "is": 1, "broken": 1},
)
def test_multiple_apostrophes_ignored(self):
self.assertEqual(count_words("''hey''"), {"hey": 1})
if __name__ == "__main__":
unittest.main()