Take the 2-minute tour ×
Code Review Stack Exchange is a question and answer site for peer programmer code reviews. It's 100% free, no registration required.

It's my first clojure script (simple word translator based on wikipedia), and I guess there are things which could be simplified / done more idiomatic way. Specifically, I wonder if get-translations couldn't be done better (it produces map lang code => term form regexp sequence)

(ns wiki-translate
   (:require [clojure.contrib.http.agent :as h])
   (:require [clojure.contrib.string :as s])
   (:import (java.net URLDecoder URLEncoder))
)

(defn url-decode
  ([text] (URLDecoder/decode text "UTF-8"))
)

(defn url-encode
  ([text] (URLEncoder/encode text "UTF-8"))
)

(defn get-url
  ([lg term] (str "http://" lg ".wikipedia.org/wiki/" (url-encode term)))
)

(defn fetch-url
  ([url] (h/string (h/http-agent url)))
)

(defn get-translations
  ([cnt]  (apply sorted-map (flatten (for  [s (re-seq #"interwiki-([^\"]+).*wiki\/([^\"]+)\".*/a>" cnt)] [(s 1) (s 2)]))))
)

(defn translate
   ([term src-lg tgt-lg] (
    let [translations (get-translations (fetch-url (get-url src-lg term)))]
      (if (contains?  translations tgt-lg) (s/replace-str  "_" " " (url-decode (get translations tgt-lg))) "<NOT FOUND>")))
)

(defn prompt-read 
  ([] (prompt-read ""))
  ([prompt] (print (format "%s: " prompt)) (flush ) (read-line))
)

(defn prompt-translate
  ([] (let [src-lg (prompt-read "Source language (en, fr, de ...)") tgt-lg (prompt-read "Target language  (en, fr, de ...)") term (prompt-read "Term to translate")]
  (println (str "\"" term "\" translated from " src-lg " to " tgt-lg " : " (translate term src-lg tgt-lg)))))
)

(while true (prompt-translate))
share|improve this question

1 Answer 1

up vote 4 down vote accepted

Don't use the multiple-arity syntax for defn unless you actually need to:

(defn url-decode
  [text]
  (URLDecoder/decode text "UTF-8"))

Your code could use some line breaks here and there. This is how clojure is usually indented:

(defn translate
  [term src-lg tgt-lg]
    (let [translations (get-translations (fetch-url (get-url src-lg term)))]
      (if (contains?  translations tgt-lg)
        (s/replace-str  "_" " " (url-decode (get translations tgt-lg)))
      "<NOT FOUND>")))

and:

(defn prompt-translate
  []
  (let [src-lg (prompt-read "Source language (en, fr, de ...)")
        tgt-lg (prompt-read "Target language  (en, fr, de ...)")
        term (prompt-read "Term to translate")]
    (println (str "\"" term "\" translated from " src-lg " to " tgt-lg " : " (translate term src-lg tgt-lg)))))

Instead picking out the regexp groups by index, you should use destructuring:

(defn get-translations
  [cnt]
  (apply sorted-map
    (flatten
      (for  [[_ a b] (re-seq #"interwiki-([^\"]+).*wiki\/([^\"]+)\".*/a>" cnt)]
        [a b]))))

... or better yet, I think, would be to do something like this:

(defn get-translations
  [cnt]
  (apply sorted-map
    (mapcat
      rest
      (re-seq #"interwiki-([^\"]+).*wiki\/([^\"]+)\".*/a>" cnt))))
share|improve this answer

Your Answer

 
discard

By posting your answer, you agree to the privacy policy and terms of service.

Not the answer you're looking for? Browse other questions tagged or ask your own question.