Add a simple language analyzers.
continuous-integration/drone/push Build is failing Details

main 0.3.4
KKlochko 2 years ago
parent 617dd34a23
commit 174abd8072

@ -4,7 +4,9 @@
:license {:name "LGPL"
:url "http://www.gnu.org/licenses/lgpl-3.0.txt"}
:dependencies [[org.clojure/clojure "1.11.1"]
[org.clojure/tools.cli "1.0.219"]]
[org.clojure/tools.cli "1.0.219"]
[org.apache.tika/tika-core "1.28.5"]
[org.apache.tika/tika-langdetect "1.28.5"]]
:main ^:skip-aot cipher-analytical-machine.core
:target-path "target/%s"
:profiles {:uberjar {:aot :all

@ -0,0 +1,21 @@
(ns cipher-analytical-machine.language_analyzer
(:import [org.apache.tika.language LanguageIdentifier])
(:gen-class))
(defn detect-language
"Return a identified language."
[text]
(-> (new LanguageIdentifier text)
.getLanguage))
(defn is-language?
"Check if the text written a language."
[text language-id]
(= (detect-language text)
language-id))
(defn is-nonsense?
"Check if the text written a language is not a language."
[text language-id]
(not (is-language? text language-id)))

@ -0,0 +1,45 @@
(ns cipher-analytical-machine.language_analyzer_test
(:require
[clojure.test :refer :all]
[cipher-analytical-machine.language_analyzer :refer :all]
))
(deftest detect-language-test
(testing "Checking the English text"
(is (= "en" (detect-language "This is a sentence that uses English."))))
(testing "Checking the Ukrainian text"
(is (= "uk" (detect-language "Текст, що написаний українською."))))
(testing "Checking a gibberish that uses English letters"
(is (= "lt" (detect-language "dfgjldfjgdfl gjdfg idfjig hdgesr khs e."))))
(testing "Checking a gibberish that uses Ukrainian letters"
(is (= "uk" (detect-language "іаврпшшцді врадів аргів аріл варї йцґ.")))))
(deftest is-language?-test
(testing "Checking the English text"
(is (is-language? "This is a sentence that uses English." "en")))
(testing "Checking the Ukrainian text"
(is (is-language? "Текст, що написаний українською." "uk")))
(testing "Checking a gibberish that uses English letters"
(is (is-language? "dfgjldfjgdfl gjdfg idfjig hdgesr khs e." "lt")))
(testing "Checking a gibberish that uses Ukrainian letters"
(is (is-language? "іаврпшшцді врадів аргів аріл варї йцґ." "uk"))))
(deftest is-nonsense?-test
(testing "Checking the English text"
(is (not (is-nonsense? "This is a sentence that uses English." "en"))))
(testing "Checking the Ukrainian text"
(is (not (is-nonsense? "Текст, що написаний українською." "uk"))))
(testing "Checking a gibberish that uses English letters"
(is (is-nonsense? "dfgjldfjgdfl gjdfg idfjig hdgesr khs e." "en")))
(testing "Checking a gibberish that uses Ukrainian letters"
(is (is-nonsense? "іаврпшшцді врадів аргів аріл варї йцґ." "uk"))))
Loading…
Cancel
Save