Script to extract highlights from the Kindle

June 21, 2020

I put together a small script to extract highlights from the Kindle and put them into separate files for each title. Keep in mind that the code expects no empty end of line. Create a highlights directory in the project root where the files will be stored.

#clip.py

class Clip:
    def __str__(self):
        return self.title + " " + self.text

    def __repr__(self):
        return self.__str__()
    
    def __init__(self, title, text):
        self.title = title
        self.text = text

# parse.py
import os
from os import walk
from clip import Clip

def parse_and_generate_notes():
    clips = []
    with open("clippings.txt", 'r') as data:
        highlights = data.read().split("==========\n")
        for highlight in highlights:
            tokens = highlight.split("\n")
            print("\n".join(tokens))
            text = tokens[3].strip()
            title = tokens[0]
            clips.append(Clip(title, text))

    titles = set([clip.title for clip in clips])
    clips_by_books = {}

    per_title_files = []
    for (dirpath, dirnames, filenames) in walk("./highlights"):
        per_title_files.extend(filenames)
        break
    

    for title in titles:
        clips_by_books[title] = [clip for clip in clips if clip.title == title]
        if(title == ""):
            filename = "highlights/untitled.md"
        else:
            filename = "highlights/" + title.replace("/","_").replace(" ","_").replace("(","").replace(")","") + ".md"
        if os.path.exists(filename):
            append_write = 'a' # append if already exists
        else:
            append_write = 'w' # make a new file if not

        with open(filename, append_write) as file:
            file.write("\n\n".join([clip.text for clip in clips_by_books[title]]))

if __name__ == "__main__":
    parse_and_generate_notes()