#!/usr/bin/env python
# wordCount.py -- Written by Coty R Miller 13/Feb/18
# ussage ./wordCount.py bookYouWishToProccess.TXT
# This code is desised to count and list words within a book
# or any genric text file. For example the book 1984 the word
# 'the' apears 5716 times.
import sys
# removePuncuation(ArrayOfStrings)
# return ArrayOfStrings
def removePuncuation(book):
# List of symbols to omit, you could also add numbers.
symbolsToOmit = '`~!@#$%^&*()-_=+[{]}\|;:",<.>/?'
newBook = {} # Create an empty array
for i in range(0, len(book)): # Loop until we've check all words.
newWord = "" # Create empty String.
for j in book[i]: # Check all letters chars in the word.
if(j not in symbolsToOmit): # If the symbol is not in our omit list.
newWord+=j # Then add it to our new word.
newBook[i] = newWord # Place the new word in the new array.
return newBook # return our new array.
# countWords(ArrayOfStrings)
# return dictionay
def countWords(listOfWords):
words = {"the":1} # Create a dictionary.
for i in range(0, len(listOfWords)): # Check the whole array with a loop.
if listOfWords[i] in words: # Check if word is in the dictionary.
a = words[listOfWords[i]] # If so.. Grab the count.
a = a + 1 # Increment it.
words[listOfWords[i]] = a # and put it back in.
a = listOfWords[i] # If not insert it into
words[a] = 1 # the dictionary with a value of one.
return words # return our new dictionary.
# printWords(Dictionary)
# return Nothing
def printWords(words):
# sort words in dictionary into assending order
for key, value in sorted(words.iteritems(), key=lambda (k,v): (v,k)):
print key, "=" , value # Print them.
# Main Function
print("Word counter...")
if len(sys.argv) > 1: # If user provided a file name.
file2load = sys.argv[1] # Find desired file to load
try: # TRY to...
myfile = open(file2load) # Load the file.
except IOError as e: # If we can't find it, error.
print "Could not load file! '%s' Check the file name." % (file2load)
book = myfile.read().split(" ") # Split words into an array
book = removePuncuation(book) # Remove puctuation.
words = countWords(book) # Count the words.
printWords(words) # Print the result.
print "Total word count =", len(book)
print "No system argument used."