from os import listdir import operator def get_episode_info(): ep_list = open("c:\ds9\eplist.txt").readlines() episodes = {} for episode in ep_list: spl = episode.split() number = int(spl[0]) code = spl[1] season = code.split("-")[0] ep = code.split("-")[1] code = "S0" + season + "E" + ep airdate = spl[3] title = " ".join(spl[4:]) episodes[400 + number] = [code, airdate, title] return episodes def get_top_lines(script, top_speakers): ds9script = open("c:/ds9/Scripts - DS9/" + script, "rU") dialog = {} for line in ds9script.readlines(): if len(line) > 6: if line[:5] == "\t\t\t\t\t" and line[5] != "\t": char = line.strip() try: dialog[char] += 1 except KeyError: dialog[char] = 1 sorted_dialog = sorted(dialog.iteritems(), key=operator.itemgetter(1)) sorted_dialog.reverse() return sorted_dialog[:top_speakers] out = "\n
" episode_info = get_episode_info() scripts = listdir("c:/ds9/Scripts - DS9") scripts.sort() chars = set() #scripts = ["402.txt"] for script in scripts: li = "" number = int(script.split(".")[0]) top_lines = get_top_lines(script,3) element = "
  • " for speaker in top_lines: element += speaker[0].translate(None, "\'-./()") + "=\"" + str(speaker[1]) + "\" " chars.add(speaker[0].translate(None, "\'-./()")) out += element +">" + li +"\n" out += "
  • \n\n" buttons = "\n
    " head = """ \n\n\n" out = head + out + buttons + "" data_out = open("c:/ds9/ds9_data.html", "w") data_out.write(out) data_out.close()