#!/usr/bin/env clj
(defn mfilter
"Return a hash-map built by removing entries for which (pred (key entry))
returns false from mapin."
[pred mapin]
(apply hash-map (apply concat (filter #(pred %) mapin))))
(defn enumerate
"Return pairs of an index into sequence, and the value at that index"
[sequence]
(map vector (iterate inc 0) sequence))
(defn add-phrase-to-phrase-dict [phrase-dict phrase list-index phrase-index]
(if (or (phrase-dict phrase) (= list-index 0))
(update-in phrase-dict [phrase list-index] conj phrase-index)
phrase-dict))
(defn add-list-to-phrase-dict [phrase-dict list-index list]
(reduce (fn [phrase-dict [phrase-index phrase]]
(add-phrase-to-phrase-dict phrase-dict phrase list-index phrase-index))
phrase-dict
(enumerate (map vector list (rest list) (nthnext list 2)))))
(defn build-phrase-dict [lists]
(let [phrase-dict (reduce (fn [phrase-dict [list-index list]]
(add-list-to-phrase-dict phrase-dict
list-index list))
{}
(enumerate lists))
list-count (count lists)]
(mfilter #(= (count (val %)) list-count) phrase-dict)))
(defn remove-phrases-from-list [phrase-starts list]
(mfilter (fn [[x _]] (not-any? #(and (>= x %) (< x (+ % 3))) phrase-starts))
(apply sorted-map (mapcat vector (iterate inc 0) list))))
(defn remove-shared-phrases [lists]
(let [phrases (apply merge-with concat (vals (build-phrase-dict lists)))]
(doseq [[idx list] (enumerate lists)]
(println (vals (remove-phrases-from-list (phrases idx) list))))))
(def mp [["As" "input" "we" "get" "the" "phrase" "dictionary" "we're" "going"]
["to" "update" "the" "phrase" "dictionary" "input" "list" "in" "the"]
["list" "of" "lists" "and" "the" "phrase" "dictionary" "Wanting" "to"]])
(remove-shared-phrases mp)