The set is a data structures. Most computer languages have this data structure. It looks like an array (or a vector, in Clojure terms), but every element is unique. Elements are stored in no particular order.

We can create set data structures by using the set function. We can also create it with var binding.

The syntax is: (set coll)

user=> (set '[111 1 2035 1 57 111])
#{1 111 2035 57}

user=> (def my-set #{"Jan", "Feb" "Mar" "Apr" "May" "Jun"})
#'user/my-set
user=> my-set
#{"Jun" "Apr" "Feb" "May" "Jan" "Mar"}

Let’s try more examples. We will use sentences from “Pride and Prejudice,” Chapter 59. Copy and paste this into your repl.

user=> ; takes out words from sentences using regular expression

user=> (def words (re-seq #"\w+" sentences))
#'user/words
user=> words
("The" "evening" "passed" "quietly" "unmarked" "by" "anything" "extraordinary" "The" "acknowledged" "lovers" "talked" "and" "laughed" "the" "unacknowledged" "were" "silent" "Darcy" "was" "not" "of" "a" "disposition" "in" "which" "happiness" "overflows" "in" "mirth" "and" "Elizabeth" "agitated" "and" "confused" "rather" "knew" "that" "she" "was" "happy" "than" "felt" "herself" "to" "be" "so" "for" "besides" "the" "immediate" "embarrassment" "there" "were" "other" "evils" "before" "her" "She" "anticipated" "what" "would" "be" "felt" "in" "the" "family" "when" "her" "situation" "became" "known" "she" "was" "aware" "that" "no" "one" "liked" "him" "but" "Jane" "and" "even" "feared" "that" "with" "the" "others" "it" "was" "a" "dislike" "which" "not" "all" "his" "fortune" "and" "consequence" "might" "do" "away")

user=> ; how many words are there?
user=> (count words)
103

user=> ; to make it understandable, sort it
user=> (sort words)
("Darcy" "Elizabeth" "Jane" "She" "The" "The" "a" "a" "acknowledged" "agitated" "all" "and" "and" "and" "and" "and" "anticipated" "anything" "aware" "away" "be" "be" "became" "before" "besides" "but" "by" "confused" "consequence" "dislike" "disposition" "do" "embarrassment" "even" "evening" "evils" "extraordinary" "family" "feared" "felt" "felt" "for" "fortune" "happiness" "happy" "her" "her" "herself" "him" "his" "immediate" "in" "in" "in" "it" "knew" "known" "laughed" "liked" "lovers" "might" "mirth" "no" "not" "not" "of" "one" "other" "others" "overflows" "passed" "quietly" "rather" "she" "she" "silent" "situation" "so" "talked" "than" "that" "that" "that" "the" "the" "the" "the" "there" "to" "unacknowledged" "unmarked" "was" "was" "was" "was" "were" "were" "what" "when" "which" "which" "with" "would")

user=> ; we see many duplications. let's make those unique
user=> (def unique-words (set words))
#'user/unique-words
user=> (sort unique-words)
("Darcy" "Elizabeth" "Jane" "She" "The" "a" "acknowledged" "agitated" "all" "and" "anticipated" "anything" "aware" "away" "be" "became" "before" "besides" "but" "by" "confused" "consequence" "dislike" "disposition" "do" "embarrassment" "even" "evening" "evils" "extraordinary" "family" "feared" "felt" "for" "fortune" "happiness" "happy" "her" "herself" "him" "his" "immediate" "in" "it" "knew" "known" "laughed" "liked" "lovers" "might" "mirth" "no" "not" "of" "one" "other" "others" "overflows" "passed" "quietly" "rather" "she" "silent" "situation" "so" "talked" "than" "that" "the" "there" "to" "unacknowledged" "unmarked" "was" "were" "what" "when" "which" "with" "would")

user=> ; how many words are left?
user=> (count unique-words)
80

user=> ; let's add "Jane" and "Lydia"
user=> ; "Jane" is already there, so she should not be added twice
user=> (sort (reduce conj unique-words ["Jane" "Lydia"]))
("Darcy" "Elizabeth" "Jane" "Lydia" "She" "The" "a" "acknowledged" "agitated" "all" "and" "anticipated" "anything" "aware" "away" "be" "became" "before" "besides" "but" "by" "confused" "consequence" "dislike" "disposition" "do" "embarrassment" "even" "evening" "evils" "extraordinary" "family" "feared" "felt" "for" "fortune" "happiness" "happy" "her" "herself" "him" "his" "immediate" "in" "it" "knew" "known" "laughed" "liked" "lovers" "might" "mirth" "no" "not" "of" "one" "other" "others" "overflows" "passed" "quietly" "rather" "she" "silent" "situation" "so" "talked" "than" "that" "the" "there" "to" "unacknowledged" "unmarked" "was" "were" "what" "when" "which" "with" "would")


user=> ; what if we eliminate articles and FANBOYS (For-And-Nor-But-Or-Yet-So)
user=> (sort (disj unique-words "a" "the" "for" "and" "nor" "but" "or" "yet" "so"))
("Darcy" "Elizabeth" "Jane" "She" "The" "acknowledged" "agitated" "all" "anticipated" "anything" "aware" "away" "be" "became" "before" "besides" "by" "confused" "consequence" "dislike" "disposition" "do" "embarrassment" "even" "evening" "evils" "extraordinary" "family" "feared" "felt" "fortune" "happiness" "happy" "her" "herself" "him" "his" "immediate" "in" "it" "knew" "known" "laughed" "liked" "lovers" "might" "mirth" "no" "not" "of" "one" "other" "others" "overflows" "passed" "quietly" "rather" "she" "silent" "situation" "talked" "than" "that" "there" "to" "unacknowledged" "unmarked" "was" "were" "what" "when" "which" "with" "would")

References