You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
22 lines
518 B
22 lines
518 B
(ns cipher-analytical-machine.language-analyzer
|
|
(:import [org.apache.tika.language LanguageIdentifier])
|
|
(:gen-class))
|
|
|
|
(defn detect-language
|
|
"Return a identified language."
|
|
[text]
|
|
(-> (new LanguageIdentifier text)
|
|
.getLanguage))
|
|
|
|
(defn is-language?
|
|
"Check if the text written a language."
|
|
[text language-id]
|
|
(= (detect-language text)
|
|
language-id))
|
|
|
|
(defn is-nonsense?
|
|
"Check if the text written a language is not a language."
|
|
[text language-id]
|
|
(not (is-language? text language-id)))
|
|
|