diff --git a/src/cipher_analytical_machine/analyzers/analyzers.clj b/src/cipher_analytical_machine/analyzers/analyzers.clj index d463afe..19a7d53 100644 --- a/src/cipher_analytical_machine/analyzers/analyzers.clj +++ b/src/cipher_analytical_machine/analyzers/analyzers.clj @@ -36,3 +36,22 @@ 0 letter-counts))) +(defn sort-map-by-count + "Return a list of pairs from a map of pairs where a pair is a character and its count. The order is descending." + [count-map] + (->> count-map + (reduce (fn [acc [char count]] (conj acc [char count])) []) + (sort-by #(- (second %))))) + +(defn table-to-string + "Return the string from a frequency table." + [table] + (->> (map first table) + (apply str))) + +(defn symbol-frequency-table + "Return a table where pairs is a symbol from a cipher text and its value which is a symbol from frequency table." + [cipher-table frequency-table] + (zipmap (table-to-string (sort-map-by-count cipher-table)) + (table-to-string (sort-map-by-count frequency-table)))) + diff --git a/test/cipher_analytical_machine/analyzers/analyzers_test.clj b/test/cipher_analytical_machine/analyzers/analyzers_test.clj new file mode 100644 index 0000000..1271dc1 --- /dev/null +++ b/test/cipher_analytical_machine/analyzers/analyzers_test.clj @@ -0,0 +1,40 @@ +(ns cipher-analytical-machine.analyzers.analyzers-test + (:require + [clojure.test :refer :all] + [cipher-analytical-machine.analyzers.analyzers :refer :all])) + +(deftest count-characters-test + (testing "Count two dublicates and one uniq character from a string." + (is (= {\a 2 \b 1} + (count-characters "aba"))))) + +(deftest chi-squared-letter-statistic-test + (testing "If the frequence of A is 0.1, the text length is 100 and contains 20 times, then the chi-squared is 10." + (is (= 10.0 + (chi-squared-letter-statistic 20 0.1 100)))) + + (testing "If the frequence of A is 0.1, the text length is 10 and contains 5 times, then the chi-squared is 16.0." + (is (= 16.0 + (chi-squared-letter-statistic 5 0.1 10))))) + +(deftest chi-squared-statistic-test + (let [text "aaaaabbbbb"] + (testing "If the frequencies of a and b is 0.1, the text length is 10 and contains them 5 times, then score is 32." + (is (= 32.0 + (chi-squared-statistic text {\a 0.1 \b 0.1})))))) + +(deftest sort-map-by-count-test + (testing "Test the sort in descending order." + (is (= [[\b 3] [\a 2] [\c 1]] + (sort-map-by-count {\a 2 \b 3 \c 1} ))))) + +(deftest table-to-string-test + (testing "Test the sort in descending order." + (is (= "bac" + (table-to-string [[\b 3] [\a 2] [\c 1]]))))) + +(deftest symbol-frequency-table-test + (testing "Test the sort in descending order." + (is (= {\d \b, \e \a, \f \c} + (symbol-frequency-table {\d 3 \e 2 \f 1} {\b 3 \a 2 \c 1}))))) +