comp3141/tortoise/Tortoise.hs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177

module Tortoise where

-- COMP3141 22T2 ASSIGNMENT 1

import Data.Semigroup
import Data.Function
import Data.List
import Test.QuickCheck

-- data type definitions

data Freq = Freq Int deriving (Eq, Ord)
data Interval = Interval Int deriving (Eq, Ord)

type Count = Integer
data Histogram = Histogram [(Interval, Count)] deriving (Show, Eq)

data SigCard =
  SigCard {
    refHistogram :: Histogram,
    excluded :: [Interval]
  } deriving (Show, Eq)

data Verdict = RealWeapon | Dud deriving (Show, Eq)

-- helper functions

notImpl :: String -> a
notImpl x = error $ "'" ++ x ++ "'" ++ " not defined"

startPoint :: Interval -> Freq
startPoint (Interval x) = Freq (100*x)

endPoint :: Interval -> Freq
endPoint (Interval x) = Freq (100*x + 100)

-- ASSIGNMENT STARTS HERE --

-- Problem 1

inside :: Freq -> Interval -> Bool
f `inside` i = f >= (startPoint i) && f < (endPoint i)

intervalOf :: Freq -> Interval
intervalOf (Freq f) = Interval (f `div` 100 - ((f `mod` 100) `div` 100))

instance Arbitrary Freq where
    arbitrary = do
    Positive x <- arbitrary
    return $ Freq x

instance Arbitrary Interval where
    arbitrary = do
    Positive x <- arbitrary
    return $ Interval x

instance Show Freq where
    show (Freq f) = show f
instance Show Interval where
    show i = show (startPoint i) ++ " to " ++ show (endPoint i)

prop_inIntervalOf :: Freq -> Bool
prop_inIntervalOf f = f `inside` (intervalOf f)

prop_inOneInterval :: Freq -> Interval -> Property
prop_inOneInterval f i = (intervalOf f /= i) ==> (not (f `inside` i))

-- Problem 2

histogram :: [(Interval, Count)] -> Histogram
histogram h = Histogram ( nubBy (\a b -> fst(a) == fst(b)) (filter (\p -> snd(p) > 0 && fst(p) >= (Interval 0)) (sortBy (compare `on` fst) h)))

instance Arbitrary Histogram where
    arbitrary = do
    Positive size <- arbitrary
    list <- vector size
    return $ histogram (list)

prop_histogram1 :: Histogram -> Bool -- ascending order
prop_histogram1 (Histogram h) = h == sortBy (compare `on` fst) h

prop_histogram2 :: Histogram -> Bool -- no zero or negative
prop_histogram2 (Histogram h) = all (\p -> snd(p) > 0 && fst(p) >= (Interval 0)) h

prop_histogram3 :: Histogram -> Bool -- duplicate keys do not exist
prop_histogram3 (Histogram h) = h == nubBy (\a b -> fst(a) == fst(b)) h

-- Problem 3

process :: [Freq] -> Histogram
process freqs = histogram (map (\i -> (i, fromIntegral (length (filter (\y -> intervalOf y == i) freqs)))) uniq_ints)
    where uniq_ints = nub (map (\f -> intervalOf f) freqs)

merge :: Histogram -> Histogram -> Histogram
merge (Histogram a) (Histogram b) = histogram (map (\p -> (fst(p), fromIntegral (sum $ map (\q -> snd(q)) (filter (\n -> fst(n) == fst(p)) all_hists)))) uniq_hists)
    where 
    all_hists = a ++ b
    uniq_hists = nubBy (\a b -> fst(a) == fst(b)) all_hists 

prop_mergeAssoc :: Histogram -> Histogram -> Histogram -> Bool
prop_mergeAssoc a b c = ((a <> b) <> c) == (a <> (b <> c))

prop_mergeId :: Histogram -> Bool
prop_mergeId a = (a <> mempty) == a

prop_mergeComm :: Histogram -> Histogram -> Bool
prop_mergeComm a b = (a <> b) == (b <> a)

instance Semigroup Histogram where
  (<>) = merge

instance Monoid Histogram where
  mappend = (<>)
  mempty = histogram []

-- Problem 4


is_similar :: Histogram -> Histogram -> Bool
is_similar (Histogram a) (Histogram b) = (sqrt (fromIntegral ((sum non_unique) + (sum unique)))) < 32 where
    intersecta = sort (intersectBy (\a b -> fst(a) == fst(b)) a b) 
    intersectb = sort (intersectBy (\a b -> fst(a) == fst(b)) b a)
    non_unique = map (\((_, b), (_, d)) -> (b - d)*(b - d)) (zip intersecta intersectb)
    unique = map (\(_, b) -> b*b) ( filter (\p -> not (elem p intersectb)) (filter (\p -> not (elem p intersecta)) a ++ b))

prop_test_refl :: Histogram -> Bool
prop_test_refl a = (is_similar a a) == True

report_refl :: Maybe Histogram
report_refl = Nothing

prop_test_symm :: Histogram -> Histogram -> Property
prop_test_symm a b = is_similar a b ==> is_similar b a

report_symm :: Maybe (Histogram, Histogram)
report_symm = Nothing

report_tran :: Maybe (Histogram, Histogram, Histogram)
report_tran = Just (histogram [(Interval 14, 25)], histogram [], histogram [(Interval 25, 26)])

prop_test_tran :: Histogram -> Histogram -> Histogram -> Property
prop_test_tran a b c = (is_similar a b) && (is_similar b c) ==> (is_similar a c)

-- Inspector O'Hare implemented match as follows:
match :: Histogram -> SigCard -> Verdict
match (Histogram h) (SigCard (Histogram r) v) =
  if d < 32 then RealWeapon else Dud where
    grab r (Histogram hs) = case filter (\x -> fst x == r) hs of
      [(_,x)] -> x
      _       -> 0
    squareDist (Histogram h1) (Histogram h2) = sum squares where
      common = sort . nub $ map fst h1 ++ map fst h2
      squares =
        map (\x -> (fromIntegral $ grab x (Histogram h1) - grab x (Histogram h2))**2)
            common
    d1 = squareDist (Histogram h) (Histogram r)
    h' = Histogram $ filter (\x -> fst x `elem` v) h
    r' = Histogram $ filter (\x -> fst x `elem` v) r
    d2 = squareDist h' r'
    d = sqrt (d1 - d2)

-- Use this reference card to find a false positive for `match`
refCard :: SigCard
refCard = SigCard (histogram r) v where
  r = [(Interval 4, 4000), (Interval 5, 6000), (Interval 6,300)]
  v = [Interval 5]

prop_test_fp :: (Int, Int) -> Property
prop_test_fp (i, j) = ((match unclean_hist refCard) == RealWeapon) ==> (is_similar h (Histogram clean_hist))  where
    offset = (Interval 10, 33)
    big_num = 16999990000
    unclean_hist = histogram [(Interval 4, 4000), (Interval 6, 300), (Interval 5, big_num), offset]
    (Histogram clean_hist) = histogram [(Interval 4, 4000), (Interval 6, 300), offset]
    h = histogram [(Interval 4, 4000), (Interval 6, 300)]

falsePos :: Histogram
falsePos = histogram [(Interval 4, 4000), (Interval 6, 300), (Interval 5, 16999990000), (Interval 10, 33)]