Add a simple language analyzers.
continuous-integration/drone/push Build is failing
Details
continuous-integration/drone/push Build is failing
Details
parent
617dd34a23
commit
174abd8072
@ -0,0 +1,21 @@
|
|||||||
|
(ns cipher-analytical-machine.language_analyzer
|
||||||
|
(:import [org.apache.tika.language LanguageIdentifier])
|
||||||
|
(:gen-class))
|
||||||
|
|
||||||
|
(defn detect-language
|
||||||
|
"Return a identified language."
|
||||||
|
[text]
|
||||||
|
(-> (new LanguageIdentifier text)
|
||||||
|
.getLanguage))
|
||||||
|
|
||||||
|
(defn is-language?
|
||||||
|
"Check if the text written a language."
|
||||||
|
[text language-id]
|
||||||
|
(= (detect-language text)
|
||||||
|
language-id))
|
||||||
|
|
||||||
|
(defn is-nonsense?
|
||||||
|
"Check if the text written a language is not a language."
|
||||||
|
[text language-id]
|
||||||
|
(not (is-language? text language-id)))
|
||||||
|
|
@ -0,0 +1,45 @@
|
|||||||
|
(ns cipher-analytical-machine.language_analyzer_test
|
||||||
|
(:require
|
||||||
|
[clojure.test :refer :all]
|
||||||
|
[cipher-analytical-machine.language_analyzer :refer :all]
|
||||||
|
))
|
||||||
|
|
||||||
|
(deftest detect-language-test
|
||||||
|
(testing "Checking the English text"
|
||||||
|
(is (= "en" (detect-language "This is a sentence that uses English."))))
|
||||||
|
|
||||||
|
(testing "Checking the Ukrainian text"
|
||||||
|
(is (= "uk" (detect-language "Текст, що написаний українською."))))
|
||||||
|
|
||||||
|
(testing "Checking a gibberish that uses English letters"
|
||||||
|
(is (= "lt" (detect-language "dfgjldfjgdfl gjdfg idfjig hdgesr khs e."))))
|
||||||
|
|
||||||
|
(testing "Checking a gibberish that uses Ukrainian letters"
|
||||||
|
(is (= "uk" (detect-language "іаврпшшцді врадів аргів аріл варї йцґ.")))))
|
||||||
|
|
||||||
|
(deftest is-language?-test
|
||||||
|
(testing "Checking the English text"
|
||||||
|
(is (is-language? "This is a sentence that uses English." "en")))
|
||||||
|
|
||||||
|
(testing "Checking the Ukrainian text"
|
||||||
|
(is (is-language? "Текст, що написаний українською." "uk")))
|
||||||
|
|
||||||
|
(testing "Checking a gibberish that uses English letters"
|
||||||
|
(is (is-language? "dfgjldfjgdfl gjdfg idfjig hdgesr khs e." "lt")))
|
||||||
|
|
||||||
|
(testing "Checking a gibberish that uses Ukrainian letters"
|
||||||
|
(is (is-language? "іаврпшшцді врадів аргів аріл варї йцґ." "uk"))))
|
||||||
|
|
||||||
|
(deftest is-nonsense?-test
|
||||||
|
(testing "Checking the English text"
|
||||||
|
(is (not (is-nonsense? "This is a sentence that uses English." "en"))))
|
||||||
|
|
||||||
|
(testing "Checking the Ukrainian text"
|
||||||
|
(is (not (is-nonsense? "Текст, що написаний українською." "uk"))))
|
||||||
|
|
||||||
|
(testing "Checking a gibberish that uses English letters"
|
||||||
|
(is (is-nonsense? "dfgjldfjgdfl gjdfg idfjig hdgesr khs e." "en")))
|
||||||
|
|
||||||
|
(testing "Checking a gibberish that uses Ukrainian letters"
|
||||||
|
(is (is-nonsense? "іаврпшшцді врадів аргів аріл варї йцґ." "uk"))))
|
||||||
|
|
Loading…
Reference in new issue