In [47]:
# Line fitting to TTRs (type-token ratios)
plt.figure(figsize=(12, 9))
for i, author in enumerate(author_corpus):
type_size_per_book = author_corpus[author]['type_size_per_book']
for k, type_size in enumerate(type_size_per_book):
type_size_log = np.log(type_size)
# Linear least -squares
x = np.log(np.arange(len(type_size))+1)
A = np.vstack([x, np.ones(len(x))]).T
y = type_size_log
m, c = np.linalg.lstsq(A, y, rcond=None)[0]
plt.subplot(3, 3, 3*i+(k+1))
plt.plot(x, y, 'o', label='actual')
plt.plot(x, m*x+c, label='linear fit')
book_name = text_info['authors'][author][k].replace('_', ' ')
plt.title('%s (%s)' % (book_name.title(), author.title()))
plt.xlabel('log(token size) [m=%.2f, c=%.2f]' % (m, c))
plt.ylabel('log(type size)')
plt.legend()
plt.grid()
plt.tight_layout()
plt.show()