diff --git a/src/cipher_analytical_machine/cipher_analyzers.clj b/src/cipher_analytical_machine/cipher_analyzers.clj new file mode 100644 index 0000000..548d13e --- /dev/null +++ b/src/cipher_analytical_machine/cipher_analyzers.clj @@ -0,0 +1,38 @@ +(ns cipher-analytical-machine.cipher_analyzers + (:require [clojure.string :as cs]) + (:gen-class)) + +(defn count-characters + "Count the letters and return map." + [text] + (reduce + (fn [acc val] + (assoc acc val + (inc (get acc val 0)))) + {} text)) + +(defn chi-squared-letter-statistic + "To calculate the frequency distribution of a letter." + [letter-count letter-frequence length-text] + (let [letter-frequence (* length-text letter-frequence)] + (/ (Math/pow + (- letter-count letter-frequence) 2) + letter-frequence))) + +(defn chi-squared-statistic + "To calculate the frequency distribution of a language. More detail about the algorithm here: http://practicalcryptography.com/cryptanalysis/text-characterisation/chi-squared-statistic/" + [text letter-frequences] + (let [length-text (count text) + letter-counts (count-characters (cs/lower-case text))] + ;letter-counts + (reduce + (fn [acc [char count]] + (+ acc + (if (contains? letter-frequences char) + (chi-squared-letter-statistic count + (get letter-frequences char) + length-text) + 0))) + 0 + letter-counts))) +