Add Chi-squared Statistic function to analyze texts.
continuous-integration/drone/push Build is failing
Details
continuous-integration/drone/push Build is failing
Details
parent
4750242a1d
commit
79e2957402
@ -0,0 +1,38 @@
|
|||||||
|
(ns cipher-analytical-machine.cipher_analyzers
|
||||||
|
(:require [clojure.string :as cs])
|
||||||
|
(:gen-class))
|
||||||
|
|
||||||
|
(defn count-characters
|
||||||
|
"Count the letters and return map."
|
||||||
|
[text]
|
||||||
|
(reduce
|
||||||
|
(fn [acc val]
|
||||||
|
(assoc acc val
|
||||||
|
(inc (get acc val 0))))
|
||||||
|
{} text))
|
||||||
|
|
||||||
|
(defn chi-squared-letter-statistic
|
||||||
|
"To calculate the frequency distribution of a letter."
|
||||||
|
[letter-count letter-frequence length-text]
|
||||||
|
(let [letter-frequence (* length-text letter-frequence)]
|
||||||
|
(/ (Math/pow
|
||||||
|
(- letter-count letter-frequence) 2)
|
||||||
|
letter-frequence)))
|
||||||
|
|
||||||
|
(defn chi-squared-statistic
|
||||||
|
"To calculate the frequency distribution of a language. More detail about the algorithm here: http://practicalcryptography.com/cryptanalysis/text-characterisation/chi-squared-statistic/"
|
||||||
|
[text letter-frequences]
|
||||||
|
(let [length-text (count text)
|
||||||
|
letter-counts (count-characters (cs/lower-case text))]
|
||||||
|
;letter-counts
|
||||||
|
(reduce
|
||||||
|
(fn [acc [char count]]
|
||||||
|
(+ acc
|
||||||
|
(if (contains? letter-frequences char)
|
||||||
|
(chi-squared-letter-statistic count
|
||||||
|
(get letter-frequences char)
|
||||||
|
length-text)
|
||||||
|
0)))
|
||||||
|
0
|
||||||
|
letter-counts)))
|
||||||
|
|
Loading…
Reference in new issue