Add Chi-squared Statistic function to analyze texts.
continuous-integration/drone/push Build is failing
Details
continuous-integration/drone/push Build is failing
Details
parent
4750242a1d
commit
79e2957402
@ -0,0 +1,38 @@
|
||||
(ns cipher-analytical-machine.cipher_analyzers
|
||||
(:require [clojure.string :as cs])
|
||||
(:gen-class))
|
||||
|
||||
(defn count-characters
|
||||
"Count the letters and return map."
|
||||
[text]
|
||||
(reduce
|
||||
(fn [acc val]
|
||||
(assoc acc val
|
||||
(inc (get acc val 0))))
|
||||
{} text))
|
||||
|
||||
(defn chi-squared-letter-statistic
|
||||
"To calculate the frequency distribution of a letter."
|
||||
[letter-count letter-frequence length-text]
|
||||
(let [letter-frequence (* length-text letter-frequence)]
|
||||
(/ (Math/pow
|
||||
(- letter-count letter-frequence) 2)
|
||||
letter-frequence)))
|
||||
|
||||
(defn chi-squared-statistic
|
||||
"To calculate the frequency distribution of a language. More detail about the algorithm here: http://practicalcryptography.com/cryptanalysis/text-characterisation/chi-squared-statistic/"
|
||||
[text letter-frequences]
|
||||
(let [length-text (count text)
|
||||
letter-counts (count-characters (cs/lower-case text))]
|
||||
;letter-counts
|
||||
(reduce
|
||||
(fn [acc [char count]]
|
||||
(+ acc
|
||||
(if (contains? letter-frequences char)
|
||||
(chi-squared-letter-statistic count
|
||||
(get letter-frequences char)
|
||||
length-text)
|
||||
0)))
|
||||
0
|
||||
letter-counts)))
|
||||
|
Loading…
Reference in new issue