From 0dd5b3152904e7d342e564c6e32210f77f4b2e28 Mon Sep 17 00:00:00 2001 From: Dmitry Kokorin Date: Tue, 8 Jun 2021 16:30:49 +0300 Subject: [PATCH] Python: word_count --- python/word-count/.exercism/metadata.json | 1 + python/word-count/README.md | 78 +++++++++++++++ python/word-count/word_count.py | 9 ++ python/word-count/word_count_test.py | 113 ++++++++++++++++++++++ 4 files changed, 201 insertions(+) create mode 100644 python/word-count/.exercism/metadata.json create mode 100644 python/word-count/README.md create mode 100644 python/word-count/word_count.py create mode 100644 python/word-count/word_count_test.py diff --git a/python/word-count/.exercism/metadata.json b/python/word-count/.exercism/metadata.json new file mode 100644 index 0000000..a6aa901 --- /dev/null +++ b/python/word-count/.exercism/metadata.json @@ -0,0 +1 @@ +{"track":"python","exercise":"word-count","id":"5ff53f015ab940dfa8ca06bcd9c6cd76","url":"https://exercism.io/my/solutions/5ff53f015ab940dfa8ca06bcd9c6cd76","handle":"DmitryKokorin","is_requester":true,"auto_approve":false} \ No newline at end of file diff --git a/python/word-count/README.md b/python/word-count/README.md new file mode 100644 index 0000000..ab9e824 --- /dev/null +++ b/python/word-count/README.md @@ -0,0 +1,78 @@ +# Word Count + +Given a phrase, count the occurrences of each _word_ in that phrase. + +For the purposes of this exercise you can expect that a _word_ will always be one of: + +1. A _number_ composed of one or more ASCII digits (ie "0" or "1234") OR +2. A _simple word_ composed of one or more ASCII letters (ie "a" or "they") OR +3. A _contraction_ of two _simple words_ joined by a single apostrophe (ie "it's" or "they're") + +When counting words you can assume the following rules: + +1. The count is _case insensitive_ (ie "You", "you", and "YOU" are 3 uses of the same word) +2. The count is _unordered_; the tests will ignore how words and counts are ordered +3. Other than the apostrophe in a _contraction_ all forms of _punctuation_ are ignored +4. The words can be separated by _any_ form of whitespace (ie "\t", "\n", " ") + +For example, for the phrase `"That's the password: 'PASSWORD 123'!", cried the Special Agent.\nSo I fled.` the count would be: + +```text +that's: 1 +the: 2 +password: 2 +123: 1 +cried: 1 +special: 1 +agent: 1 +so: 1 +i: 1 +fled: 1 +``` + + +## Exception messages + +Sometimes it is necessary to raise an exception. When you do this, you should include a meaningful error message to +indicate what the source of the error is. This makes your code more readable and helps significantly with debugging. Not +every exercise will require you to raise an exception, but for those that do, the tests will only pass if you include +a message. + +To raise a message with an exception, just write it as an argument to the exception type. For example, instead of +`raise Exception`, you should write: + +```python +raise Exception("Meaningful message indicating the source of the error") +``` + +## Running the tests + +To run the tests, run `pytest word_count_test.py` + +Alternatively, you can tell Python to run the pytest module: +`python -m pytest word_count_test.py` + +### Common `pytest` options + +- `-v` : enable verbose output +- `-x` : stop running tests on first failure +- `--ff` : run failures from previous test before running other test cases + +For other options, see `python -m pytest -h` + +## Submitting Exercises + +Note that, when trying to submit an exercise, make sure the solution is in the `$EXERCISM_WORKSPACE/python/word-count` directory. + +You can find your Exercism workspace by running `exercism debug` and looking for the line that starts with `Workspace`. + +For more detailed information about running tests, code style and linting, +please see [Running the Tests](http://exercism.io/tracks/python/tests). + +## Source + +This is a classic toy problem, but we were reminded of it by seeing it in the Go Tour. + +## Submitting Incomplete Solutions + +It's possible to submit an incomplete solution so you can see how others have completed the exercise. diff --git a/python/word-count/word_count.py b/python/word-count/word_count.py new file mode 100644 index 0000000..af44c44 --- /dev/null +++ b/python/word-count/word_count.py @@ -0,0 +1,9 @@ +from collections import Counter +import re + +WORDS_REGEXP = re.compile(r"([^\W_]+('[^\W_]+)?)") + + +def count_words(sentence): + words = [value[0].lower() for value in re.findall(WORDS_REGEXP, sentence)] + return Counter(words) diff --git a/python/word-count/word_count_test.py b/python/word-count/word_count_test.py new file mode 100644 index 0000000..5a28d28 --- /dev/null +++ b/python/word-count/word_count_test.py @@ -0,0 +1,113 @@ +import unittest + +from word_count import count_words + +# Tests adapted from `problem-specifications//canonical-data.json` + + +class WordCountTest(unittest.TestCase): + def test_count_one_word(self): + self.assertEqual(count_words("word"), {"word": 1}) + + def test_count_one_of_each_word(self): + self.assertEqual(count_words("one of each"), {"one": 1, "of": 1, "each": 1}) + + def test_multiple_occurrences_of_a_word(self): + self.assertEqual( + count_words("one fish two fish red fish blue fish"), + {"one": 1, "fish": 4, "two": 1, "red": 1, "blue": 1}, + ) + + def test_handles_cramped_lists(self): + self.assertEqual(count_words("one,two,three"), {"one": 1, "two": 1, "three": 1}) + + def test_handles_expanded_lists(self): + self.assertEqual( + count_words("one,\ntwo,\nthree"), {"one": 1, "two": 1, "three": 1} + ) + + def test_ignore_punctuation(self): + self.assertEqual( + count_words("car: carpet as java: javascript!!&@$%^&"), + {"car": 1, "carpet": 1, "as": 1, "java": 1, "javascript": 1}, + ) + + def test_include_numbers(self): + self.assertEqual( + count_words("testing, 1, 2 testing"), {"testing": 2, "1": 1, "2": 1} + ) + + def test_normalize_case(self): + self.assertEqual(count_words("go Go GO Stop stop"), {"go": 3, "stop": 2}) + + def test_with_apostrophes(self): + self.assertEqual( + count_words("First: don't laugh. Then: don't cry."), + {"first": 1, "don't": 2, "laugh": 1, "then": 1, "cry": 1}, + ) + + def test_with_quotations(self): + self.assertEqual( + count_words("Joe can't tell between 'large' and large."), + {"joe": 1, "can't": 1, "tell": 1, "between": 1, "large": 2, "and": 1}, + ) + + def test_substrings_from_the_beginning(self): + self.assertEqual( + count_words("Joe can't tell between app, apple and a."), + { + "joe": 1, + "can't": 1, + "tell": 1, + "between": 1, + "app": 1, + "apple": 1, + "and": 1, + "a": 1, + }, + ) + + def test_multiple_spaces_not_detected_as_a_word(self): + self.assertEqual( + count_words(" multiple whitespaces"), {"multiple": 1, "whitespaces": 1} + ) + + def test_alternating_word_separators_not_detected_as_a_word(self): + self.assertEqual( + count_words(",\n,one,\n ,two \n 'three'"), {"one": 1, "two": 1, "three": 1} + ) + + # Additional tests for this track + + def test_tabs(self): + self.assertEqual( + count_words( + "rah rah ah ah ah roma roma ma ga ga oh la la want your bad romance" + ), + { + "rah": 2, + "ah": 3, + "roma": 2, + "ma": 1, + "ga": 2, + "oh": 1, + "la": 2, + "want": 1, + "your": 1, + "bad": 1, + "romance": 1, + }, + ) + + def test_non_alphanumeric(self): + self.assertEqual( + count_words("hey,my_spacebar_is_broken"), + {"hey": 1, "my": 1, "spacebar": 1, "is": 1, "broken": 1}, + ) + + def test_multiple_apostrophes_ignored(self): + self.assertEqual(count_words("''hey''"), {"hey": 1}) + + +if __name__ == "__main__": + unittest.main()