You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
68 lines
2.2 KiB
68 lines
2.2 KiB
(ns cipher-analytical-machine.analyzers.analyzers
|
|
(:require [clojure.string :as cs])
|
|
(:gen-class))
|
|
|
|
(defn count-characters
|
|
"Count the letters and return map."
|
|
[text]
|
|
(reduce
|
|
(fn [acc val]
|
|
(assoc acc val
|
|
(inc (get acc val 0))))
|
|
{} text))
|
|
|
|
(defn reverse-count-characters
|
|
"Convert a map of pairs to a map where the characters grouped by the count and a count is the key."
|
|
[count-map]
|
|
(reduce
|
|
(fn [acc [symbol count]]
|
|
(->> (conj (get acc count []) symbol)
|
|
(into [])
|
|
(assoc acc count)))
|
|
{} count-map))
|
|
|
|
(defn chi-squared-letter-statistic
|
|
"To calculate the frequency distribution of a letter."
|
|
[letter-count letter-frequence length-text]
|
|
(let [letter-frequence (* length-text letter-frequence)]
|
|
(/ (Math/pow
|
|
(- letter-count letter-frequence) 2)
|
|
letter-frequence)))
|
|
|
|
(defn chi-squared-statistic
|
|
"To calculate the frequency distribution of a language. More detail about the algorithm here: http://practicalcryptography.com/cryptanalysis/text-characterisation/chi-squared-statistic/"
|
|
[text letter-frequencies]
|
|
(let [length-text (count text)
|
|
letter-counts (count-characters (cs/lower-case text))]
|
|
;letter-counts
|
|
(reduce
|
|
(fn [acc [char count]]
|
|
(+ acc
|
|
(if (contains? letter-frequencies char)
|
|
(chi-squared-letter-statistic count
|
|
(get letter-frequencies char)
|
|
length-text)
|
|
0)))
|
|
0
|
|
letter-counts)))
|
|
|
|
(defn sort-map-by-count
|
|
"Return a list of pairs from a map of pairs where a pair is a character and its count. The order is descending."
|
|
[count-map]
|
|
(->> count-map
|
|
(reduce (fn [acc [char count]] (conj acc [char count])) [])
|
|
(sort-by #(- (second %)))))
|
|
|
|
(defn table-to-string
|
|
"Return the string from a frequency table."
|
|
[table]
|
|
(->> (map first table)
|
|
(apply str)))
|
|
|
|
(defn symbol-frequency-table
|
|
"Return a table where pairs is a symbol from a cipher text and its value which is a symbol from frequency table."
|
|
[cipher-table frequency-table]
|
|
(zipmap (table-to-string (sort-map-by-count cipher-table))
|
|
(table-to-string (sort-map-by-count frequency-table))))
|
|
|