In [ ]:
def tokenizer(text):
    tokens = []
    for line in text:
        words = line.split(' ')
        for word in words:
            # Get rid of characters except alphabetic
            word = re.sub("[^A-Za-z]", "", word)
            if len(word)>1:
                tokens.append(word.lower())

    return tokens

def remove_stopwords(input_words):
    stopword_list = tokenizer(read_txt('data/stop_words.txt'))
    output_words = [word for word in input_words if word not in stopword_list]
    return output_words