(ns cipher-analytical-machine.analyzers.analyzers (:require [clojure.string :as cs]) (:gen-class)) (defn count-characters "Count the letters and return map." [text] (reduce (fn [acc val] (assoc acc val (inc (get acc val 0)))) {} text)) (defn reverse-count-characters "Convert a map of pairs to a map where the characters grouped by the count and a count is the key." [count-map] (reduce (fn [acc [symbol count]] (->> (conj (get acc count []) symbol) (into []) (assoc acc count))) {} count-map)) (defn chi-squared-letter-statistic "To calculate the frequency distribution of a letter." [letter-count letter-frequence length-text] (let [letter-frequence (* length-text letter-frequence)] (/ (Math/pow (- letter-count letter-frequence) 2) letter-frequence))) (defn chi-squared-statistic "To calculate the frequency distribution of a language. More detail about the algorithm here: http://practicalcryptography.com/cryptanalysis/text-characterisation/chi-squared-statistic/" [text letter-frequencies] (let [length-text (count text) letter-counts (count-characters (cs/lower-case text))] ;letter-counts (reduce (fn [acc [char count]] (+ acc (if (contains? letter-frequencies char) (chi-squared-letter-statistic count (get letter-frequencies char) length-text) 0))) 0 letter-counts))) (defn sort-map-by-count "Return a list of pairs from a map of pairs where a pair is a character and its count. The order is descending." [count-map] (->> count-map (reduce (fn [acc [char count]] (conj acc [char count])) []) (sort-by #(- (second %))))) (defn table-to-string "Return the string from a frequency table." [table] (->> (map first table) (apply str))) (defn symbol-frequency-table "Return a table where pairs is a symbol from a cipher text and its value which is a symbol from frequency table." [cipher-table frequency-table] (zipmap (table-to-string (sort-map-by-count cipher-table)) (table-to-string (sort-map-by-count frequency-table))))