Script to extract highlights from the Kindle
I put together a small script to extract highlights from the Kindle and put them into separate files for each title. Keep in mind that the code expects no empty end of line. Create a highlights
directory in the project root where the files will be stored.
#clip.py
class Clip:
def __str__(self):
return self.title + " " + self.text
def __repr__(self):
return self.__str__()
def __init__(self, title, text):
self.title = title
self.text = text
# parse.py
import os
from os import walk
from clip import Clip
def parse_and_generate_notes():
clips = []
with open("clippings.txt", 'r') as data:
highlights = data.read().split("==========\n")
for highlight in highlights:
tokens = highlight.split("\n")
print("\n".join(tokens))
text = tokens[3].strip()
title = tokens[0]
clips.append(Clip(title, text))
titles = set([clip.title for clip in clips])
clips_by_books = {}
per_title_files = []
for (dirpath, dirnames, filenames) in walk("./highlights"):
per_title_files.extend(filenames)
break
for title in titles:
clips_by_books[title] = [clip for clip in clips if clip.title == title]
if(title == ""):
filename = "highlights/untitled.md"
else:
filename = "highlights/" + title.replace("/","_").replace(" ","_").replace("(","").replace(")","") + ".md"
if os.path.exists(filename):
append_write = 'a' # append if already exists
else:
append_write = 'w' # make a new file if not
with open(filename, append_write) as file:
file.write("\n\n".join([clip.text for clip in clips_by_books[title]]))
if __name__ == "__main__":
parse_and_generate_notes()