Python: word_count
This commit is contained in:
parent
6223031c18
commit
0dd5b31529
4 changed files with 201 additions and 0 deletions
1
python/word-count/.exercism/metadata.json
Normal file
1
python/word-count/.exercism/metadata.json
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
{"track":"python","exercise":"word-count","id":"5ff53f015ab940dfa8ca06bcd9c6cd76","url":"https://exercism.io/my/solutions/5ff53f015ab940dfa8ca06bcd9c6cd76","handle":"DmitryKokorin","is_requester":true,"auto_approve":false}
|
||||||
78
python/word-count/README.md
Normal file
78
python/word-count/README.md
Normal file
|
|
@ -0,0 +1,78 @@
|
||||||
|
# Word Count
|
||||||
|
|
||||||
|
Given a phrase, count the occurrences of each _word_ in that phrase.
|
||||||
|
|
||||||
|
For the purposes of this exercise you can expect that a _word_ will always be one of:
|
||||||
|
|
||||||
|
1. A _number_ composed of one or more ASCII digits (ie "0" or "1234") OR
|
||||||
|
2. A _simple word_ composed of one or more ASCII letters (ie "a" or "they") OR
|
||||||
|
3. A _contraction_ of two _simple words_ joined by a single apostrophe (ie "it's" or "they're")
|
||||||
|
|
||||||
|
When counting words you can assume the following rules:
|
||||||
|
|
||||||
|
1. The count is _case insensitive_ (ie "You", "you", and "YOU" are 3 uses of the same word)
|
||||||
|
2. The count is _unordered_; the tests will ignore how words and counts are ordered
|
||||||
|
3. Other than the apostrophe in a _contraction_ all forms of _punctuation_ are ignored
|
||||||
|
4. The words can be separated by _any_ form of whitespace (ie "\t", "\n", " ")
|
||||||
|
|
||||||
|
For example, for the phrase `"That's the password: 'PASSWORD 123'!", cried the Special Agent.\nSo I fled.` the count would be:
|
||||||
|
|
||||||
|
```text
|
||||||
|
that's: 1
|
||||||
|
the: 2
|
||||||
|
password: 2
|
||||||
|
123: 1
|
||||||
|
cried: 1
|
||||||
|
special: 1
|
||||||
|
agent: 1
|
||||||
|
so: 1
|
||||||
|
i: 1
|
||||||
|
fled: 1
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Exception messages
|
||||||
|
|
||||||
|
Sometimes it is necessary to raise an exception. When you do this, you should include a meaningful error message to
|
||||||
|
indicate what the source of the error is. This makes your code more readable and helps significantly with debugging. Not
|
||||||
|
every exercise will require you to raise an exception, but for those that do, the tests will only pass if you include
|
||||||
|
a message.
|
||||||
|
|
||||||
|
To raise a message with an exception, just write it as an argument to the exception type. For example, instead of
|
||||||
|
`raise Exception`, you should write:
|
||||||
|
|
||||||
|
```python
|
||||||
|
raise Exception("Meaningful message indicating the source of the error")
|
||||||
|
```
|
||||||
|
|
||||||
|
## Running the tests
|
||||||
|
|
||||||
|
To run the tests, run `pytest word_count_test.py`
|
||||||
|
|
||||||
|
Alternatively, you can tell Python to run the pytest module:
|
||||||
|
`python -m pytest word_count_test.py`
|
||||||
|
|
||||||
|
### Common `pytest` options
|
||||||
|
|
||||||
|
- `-v` : enable verbose output
|
||||||
|
- `-x` : stop running tests on first failure
|
||||||
|
- `--ff` : run failures from previous test before running other test cases
|
||||||
|
|
||||||
|
For other options, see `python -m pytest -h`
|
||||||
|
|
||||||
|
## Submitting Exercises
|
||||||
|
|
||||||
|
Note that, when trying to submit an exercise, make sure the solution is in the `$EXERCISM_WORKSPACE/python/word-count` directory.
|
||||||
|
|
||||||
|
You can find your Exercism workspace by running `exercism debug` and looking for the line that starts with `Workspace`.
|
||||||
|
|
||||||
|
For more detailed information about running tests, code style and linting,
|
||||||
|
please see [Running the Tests](http://exercism.io/tracks/python/tests).
|
||||||
|
|
||||||
|
## Source
|
||||||
|
|
||||||
|
This is a classic toy problem, but we were reminded of it by seeing it in the Go Tour.
|
||||||
|
|
||||||
|
## Submitting Incomplete Solutions
|
||||||
|
|
||||||
|
It's possible to submit an incomplete solution so you can see how others have completed the exercise.
|
||||||
9
python/word-count/word_count.py
Normal file
9
python/word-count/word_count.py
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
from collections import Counter
|
||||||
|
import re
|
||||||
|
|
||||||
|
WORDS_REGEXP = re.compile(r"([^\W_]+('[^\W_]+)?)")
|
||||||
|
|
||||||
|
|
||||||
|
def count_words(sentence):
|
||||||
|
words = [value[0].lower() for value in re.findall(WORDS_REGEXP, sentence)]
|
||||||
|
return Counter(words)
|
||||||
113
python/word-count/word_count_test.py
Normal file
113
python/word-count/word_count_test.py
Normal file
|
|
@ -0,0 +1,113 @@
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from word_count import count_words
|
||||||
|
|
||||||
|
# Tests adapted from `problem-specifications//canonical-data.json`
|
||||||
|
|
||||||
|
|
||||||
|
class WordCountTest(unittest.TestCase):
|
||||||
|
def test_count_one_word(self):
|
||||||
|
self.assertEqual(count_words("word"), {"word": 1})
|
||||||
|
|
||||||
|
def test_count_one_of_each_word(self):
|
||||||
|
self.assertEqual(count_words("one of each"), {"one": 1, "of": 1, "each": 1})
|
||||||
|
|
||||||
|
def test_multiple_occurrences_of_a_word(self):
|
||||||
|
self.assertEqual(
|
||||||
|
count_words("one fish two fish red fish blue fish"),
|
||||||
|
{"one": 1, "fish": 4, "two": 1, "red": 1, "blue": 1},
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_handles_cramped_lists(self):
|
||||||
|
self.assertEqual(count_words("one,two,three"), {"one": 1, "two": 1, "three": 1})
|
||||||
|
|
||||||
|
def test_handles_expanded_lists(self):
|
||||||
|
self.assertEqual(
|
||||||
|
count_words("one,\ntwo,\nthree"), {"one": 1, "two": 1, "three": 1}
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_ignore_punctuation(self):
|
||||||
|
self.assertEqual(
|
||||||
|
count_words("car: carpet as java: javascript!!&@$%^&"),
|
||||||
|
{"car": 1, "carpet": 1, "as": 1, "java": 1, "javascript": 1},
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_include_numbers(self):
|
||||||
|
self.assertEqual(
|
||||||
|
count_words("testing, 1, 2 testing"), {"testing": 2, "1": 1, "2": 1}
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_normalize_case(self):
|
||||||
|
self.assertEqual(count_words("go Go GO Stop stop"), {"go": 3, "stop": 2})
|
||||||
|
|
||||||
|
def test_with_apostrophes(self):
|
||||||
|
self.assertEqual(
|
||||||
|
count_words("First: don't laugh. Then: don't cry."),
|
||||||
|
{"first": 1, "don't": 2, "laugh": 1, "then": 1, "cry": 1},
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_with_quotations(self):
|
||||||
|
self.assertEqual(
|
||||||
|
count_words("Joe can't tell between 'large' and large."),
|
||||||
|
{"joe": 1, "can't": 1, "tell": 1, "between": 1, "large": 2, "and": 1},
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_substrings_from_the_beginning(self):
|
||||||
|
self.assertEqual(
|
||||||
|
count_words("Joe can't tell between app, apple and a."),
|
||||||
|
{
|
||||||
|
"joe": 1,
|
||||||
|
"can't": 1,
|
||||||
|
"tell": 1,
|
||||||
|
"between": 1,
|
||||||
|
"app": 1,
|
||||||
|
"apple": 1,
|
||||||
|
"and": 1,
|
||||||
|
"a": 1,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_multiple_spaces_not_detected_as_a_word(self):
|
||||||
|
self.assertEqual(
|
||||||
|
count_words(" multiple whitespaces"), {"multiple": 1, "whitespaces": 1}
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_alternating_word_separators_not_detected_as_a_word(self):
|
||||||
|
self.assertEqual(
|
||||||
|
count_words(",\n,one,\n ,two \n 'three'"), {"one": 1, "two": 1, "three": 1}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Additional tests for this track
|
||||||
|
|
||||||
|
def test_tabs(self):
|
||||||
|
self.assertEqual(
|
||||||
|
count_words(
|
||||||
|
"rah rah ah ah ah roma roma ma ga ga oh la la want your bad romance"
|
||||||
|
),
|
||||||
|
{
|
||||||
|
"rah": 2,
|
||||||
|
"ah": 3,
|
||||||
|
"roma": 2,
|
||||||
|
"ma": 1,
|
||||||
|
"ga": 2,
|
||||||
|
"oh": 1,
|
||||||
|
"la": 2,
|
||||||
|
"want": 1,
|
||||||
|
"your": 1,
|
||||||
|
"bad": 1,
|
||||||
|
"romance": 1,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_non_alphanumeric(self):
|
||||||
|
self.assertEqual(
|
||||||
|
count_words("hey,my_spacebar_is_broken"),
|
||||||
|
{"hey": 1, "my": 1, "spacebar": 1, "is": 1, "broken": 1},
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_multiple_apostrophes_ignored(self):
|
||||||
|
self.assertEqual(count_words("''hey''"), {"hey": 1})
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
Loading…
Add table
Add a link
Reference in a new issue