Add a simple language analyzers.
continuous-integration/drone/push Build is failing
Details
continuous-integration/drone/push Build is failing
Details
parent
617dd34a23
commit
174abd8072
@ -0,0 +1,21 @@
|
||||
(ns cipher-analytical-machine.language_analyzer
|
||||
(:import [org.apache.tika.language LanguageIdentifier])
|
||||
(:gen-class))
|
||||
|
||||
(defn detect-language
|
||||
"Return a identified language."
|
||||
[text]
|
||||
(-> (new LanguageIdentifier text)
|
||||
.getLanguage))
|
||||
|
||||
(defn is-language?
|
||||
"Check if the text written a language."
|
||||
[text language-id]
|
||||
(= (detect-language text)
|
||||
language-id))
|
||||
|
||||
(defn is-nonsense?
|
||||
"Check if the text written a language is not a language."
|
||||
[text language-id]
|
||||
(not (is-language? text language-id)))
|
||||
|
@ -0,0 +1,45 @@
|
||||
(ns cipher-analytical-machine.language_analyzer_test
|
||||
(:require
|
||||
[clojure.test :refer :all]
|
||||
[cipher-analytical-machine.language_analyzer :refer :all]
|
||||
))
|
||||
|
||||
(deftest detect-language-test
|
||||
(testing "Checking the English text"
|
||||
(is (= "en" (detect-language "This is a sentence that uses English."))))
|
||||
|
||||
(testing "Checking the Ukrainian text"
|
||||
(is (= "uk" (detect-language "Текст, що написаний українською."))))
|
||||
|
||||
(testing "Checking a gibberish that uses English letters"
|
||||
(is (= "lt" (detect-language "dfgjldfjgdfl gjdfg idfjig hdgesr khs e."))))
|
||||
|
||||
(testing "Checking a gibberish that uses Ukrainian letters"
|
||||
(is (= "uk" (detect-language "іаврпшшцді врадів аргів аріл варї йцґ.")))))
|
||||
|
||||
(deftest is-language?-test
|
||||
(testing "Checking the English text"
|
||||
(is (is-language? "This is a sentence that uses English." "en")))
|
||||
|
||||
(testing "Checking the Ukrainian text"
|
||||
(is (is-language? "Текст, що написаний українською." "uk")))
|
||||
|
||||
(testing "Checking a gibberish that uses English letters"
|
||||
(is (is-language? "dfgjldfjgdfl gjdfg idfjig hdgesr khs e." "lt")))
|
||||
|
||||
(testing "Checking a gibberish that uses Ukrainian letters"
|
||||
(is (is-language? "іаврпшшцді врадів аргів аріл варї йцґ." "uk"))))
|
||||
|
||||
(deftest is-nonsense?-test
|
||||
(testing "Checking the English text"
|
||||
(is (not (is-nonsense? "This is a sentence that uses English." "en"))))
|
||||
|
||||
(testing "Checking the Ukrainian text"
|
||||
(is (not (is-nonsense? "Текст, що написаний українською." "uk"))))
|
||||
|
||||
(testing "Checking a gibberish that uses English letters"
|
||||
(is (is-nonsense? "dfgjldfjgdfl gjdfg idfjig hdgesr khs e." "en")))
|
||||
|
||||
(testing "Checking a gibberish that uses Ukrainian letters"
|
||||
(is (is-nonsense? "іаврпшшцді врадів аргів аріл варї йцґ." "uk"))))
|
||||
|
Loading…
Reference in new issue