In [47]:
# Line fitting to TTRs (type-token ratios)
plt.figure(figsize=(12, 9))
for i, author in enumerate(author_corpus):
    type_size_per_book = author_corpus[author]['type_size_per_book']

    for k, type_size in enumerate(type_size_per_book):
        type_size_log = np.log(type_size)

        # Linear least -squares
        x = np.log(np.arange(len(type_size))+1)
        A = np.vstack([x, np.ones(len(x))]).T
        y = type_size_log
        m, c = np.linalg.lstsq(A, y, rcond=None)[0]

        plt.subplot(3, 3, 3*i+(k+1))
        plt.plot(x, y, 'o', label='actual')
        plt.plot(x, m*x+c, label='linear fit')
        book_name = text_info['authors'][author][k].replace('_', ' ')
        plt.title('%s (%s)' % (book_name.title(), author.title()))
        plt.xlabel('log(token size) [m=%.2f, c=%.2f]' % (m, c))
        plt.ylabel('log(type size)')
        plt.legend()
        plt.grid()

plt.tight_layout()
plt.show()
No description has been provided for this image