Mired in code
Artifact [2dda0382ad]
Not logged in
Public Repositories
mwm's Repositories

Artifact 2dda0382ade7c9650a9c327bcd62b629e0030d02:


#!/usr/bin/env clj

(defn mfilter
  "Return a hash-map built by removing entries for which (pred (key entry))
  returns false from mapin."
  [pred mapin]
  (apply hash-map (apply concat (filter #(pred %) mapin))))

(defn enumerate
  "Return pairs of an index into sequence, and the value at that index"
  [sequence]
  (map vector (iterate inc 0) sequence))

(defn add-phrase-to-phrase-dict [phrase-dict phrase list-index phrase-index]
  (if (or (phrase-dict phrase) (= list-index 0))
    (update-in phrase-dict [phrase list-index] conj phrase-index)
    phrase-dict))
  
(defn add-list-to-phrase-dict [phrase-dict list-index list]
  (reduce (fn [phrase-dict [phrase-index phrase]]
	    (add-phrase-to-phrase-dict phrase-dict phrase list-index phrase-index))
	  phrase-dict
	  (enumerate (map vector list (rest list) (nthnext list 2)))))

(defn build-phrase-dict [lists]
  (let [phrase-dict (reduce (fn [phrase-dict [list-index list]]
			      (add-list-to-phrase-dict phrase-dict 
						       list-index list))
			    {}
			    (enumerate lists))
	list-count (count lists)]
    (mfilter #(= (count (val %)) list-count) phrase-dict)))

(defn remove-phrases-from-list [phrase-starts list]
  (mfilter (fn [[x _]] (not-any? #(and (>= x %) (< x (+ % 3))) phrase-starts))
	   (apply sorted-map (mapcat vector (iterate inc 0) list))))

(defn remove-shared-phrases [lists]
  (let [phrases (apply merge-with concat (vals (build-phrase-dict lists)))]
    (doseq [[idx list] (enumerate lists)]
      (println (vals (remove-phrases-from-list (phrases idx) list))))))


(def mp [["As" "input" "we" "get" "the" "phrase" "dictionary" "we're" "going"]
	 ["to" "update" "the" "phrase" "dictionary" "input" "list" "in" "the"]
	 ["list" "of" "lists" "and" "the" "phrase" "dictionary" "Wanting" "to"]])


(remove-shared-phrases mp)