You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
cipher-analytical-machine/src/cipher_analytical_machine/analyzers/analyzers.clj

68 lines
2.2 KiB

(ns cipher-analytical-machine.analyzers.analyzers
(:require [clojure.string :as cs])
(:gen-class))
(defn count-characters
"Count the letters and return map."
[text]
(reduce
(fn [acc val]
(assoc acc val
(inc (get acc val 0))))
{} text))
(defn reverse-count-characters
"Convert a map of pairs to a map where the characters grouped by the count and a count is the key."
[count-map]
(reduce
(fn [acc [symbol count]]
(->> (conj (get acc count []) symbol)
(into [])
(assoc acc count)))
{} count-map))
(defn chi-squared-letter-statistic
"To calculate the frequency distribution of a letter."
[letter-count letter-frequence length-text]
(let [letter-frequence (* length-text letter-frequence)]
(/ (Math/pow
(- letter-count letter-frequence) 2)
letter-frequence)))
(defn chi-squared-statistic
"To calculate the frequency distribution of a language. More detail about the algorithm here: http://practicalcryptography.com/cryptanalysis/text-characterisation/chi-squared-statistic/"
[text letter-frequencies]
(let [length-text (count text)
letter-counts (count-characters (cs/lower-case text))]
;letter-counts
(reduce
(fn [acc [char count]]
(+ acc
(if (contains? letter-frequencies char)
(chi-squared-letter-statistic count
(get letter-frequencies char)
length-text)
0)))
0
letter-counts)))
(defn sort-map-by-count
"Return a list of pairs from a map of pairs where a pair is a character and its count. The order is descending."
[count-map]
(->> count-map
(reduce (fn [acc [char count]] (conj acc [char count])) [])
(sort-by #(- (second %)))))
(defn table-to-string
"Return the string from a frequency table."
[table]
(->> (map first table)
(apply str)))
(defn symbol-frequency-table
"Return a table where pairs is a symbol from a cipher text and its value which is a symbol from frequency table."
[cipher-table frequency-table]
(zipmap (table-to-string (sort-map-by-count cipher-table))
(table-to-string (sort-map-by-count frequency-table))))