module Tortoise where

-- COMP3141 22T2 ASSIGNMENT 1

import Data.Semigroup
import Data.Function
import Data.List
import Test.QuickCheck

-- data type definitions

data Freq = Freq Int deriving (Eq, Ord)
data Interval = Interval Int deriving (Eq, Ord)

type Count = Integer
data Histogram = Histogram [(Interval, Count)] deriving (Show, Eq)

data SigCard =
  SigCard {
    refHistogram :: Histogram,
    excluded :: [Interval]
  } deriving (Show, Eq)

data Verdict = RealWeapon | Dud deriving (Show, Eq)

-- helper functions

notImpl :: String -> a
notImpl x = error $ "'" ++ x ++ "'" ++ " not defined"

startPoint :: Interval -> Freq
startPoint (Interval x) = Freq (100*x)

endPoint :: Interval -> Freq
endPoint (Interval x) = Freq (100*x + 100)

-- ASSIGNMENT STARTS HERE --

-- Problem 1

inside :: Freq -> Interval -> Bool
f `inside` i = f >= (startPoint i) && f < (endPoint i)

intervalOf :: Freq -> Interval
intervalOf (Freq f) = Interval (f `div` 100 - ((f `mod` 100) `div` 100))

instance Arbitrary Freq where
    arbitrary = do
    Positive x <- arbitrary
    return $ Freq x

instance Arbitrary Interval where
    arbitrary = do
    Positive x <- arbitrary
    return $ Interval x

instance Show Freq where
    show (Freq f) = show f
instance Show Interval where
    show i = show (startPoint i) ++ " to " ++ show (endPoint i)

prop_inIntervalOf :: Freq -> Bool
prop_inIntervalOf f = f `inside` (intervalOf f)

prop_inOneInterval :: Freq -> Interval -> Property
prop_inOneInterval f i = (intervalOf f /= i) ==> (not (f `inside` i))

-- Problem 2

histogram :: [(Interval, Count)] -> Histogram
histogram h = Histogram ( nubBy (\a b -> fst(a) == fst(b)) (filter (\p -> snd(p) > 0 && fst(p) >= (Interval 0)) (sortBy (compare `on` fst) h)))

instance Arbitrary Histogram where
    arbitrary = do
    Positive size <- arbitrary
    list <- vector size
    return $ histogram (list)

prop_histogram1 :: Histogram -> Bool -- ascending order
prop_histogram1 (Histogram h) = h == sortBy (compare `on` fst) h

prop_histogram2 :: Histogram -> Bool -- no zero or negative
prop_histogram2 (Histogram h) = all (\p -> snd(p) > 0 && fst(p) >= (Interval 0)) h

prop_histogram3 :: Histogram -> Bool -- duplicate keys do not exist
prop_histogram3 (Histogram h) = h == nubBy (\a b -> fst(a) == fst(b)) h

-- Problem 3

process :: [Freq] -> Histogram
process freqs = histogram (map (\i -> (i, fromIntegral (length (filter (\y -> intervalOf y == i) freqs)))) uniq_ints)
    where uniq_ints = nub (map (\f -> intervalOf f) freqs)

merge :: Histogram -> Histogram -> Histogram
merge (Histogram a) (Histogram b) = histogram (map (\p -> (fst(p), fromIntegral (sum $ map (\q -> snd(q)) (filter (\n -> fst(n) == fst(p)) all_hists)))) uniq_hists)
    where 
    all_hists = a ++ b
    uniq_hists = nubBy (\a b -> fst(a) == fst(b)) all_hists 

prop_mergeAssoc :: Histogram -> Histogram -> Histogram -> Bool
prop_mergeAssoc a b c = ((a <> b) <> c) == (a <> (b <> c))

prop_mergeId :: Histogram -> Bool
prop_mergeId a = (a <> mempty) == a

prop_mergeComm :: Histogram -> Histogram -> Bool
prop_mergeComm a b = (a <> b) == (b <> a)

instance Semigroup Histogram where
  (<>) = merge

instance Monoid Histogram where
  mappend = (<>)
  mempty = histogram []

-- Problem 4


is_similar :: Histogram -> Histogram -> Bool
is_similar (Histogram a) (Histogram b) = (sqrt (fromIntegral ((sum non_unique) + (sum unique)))) < 32 where
    intersecta = sort (intersectBy (\a b -> fst(a) == fst(b)) a b) 
    intersectb = sort (intersectBy (\a b -> fst(a) == fst(b)) b a)
    non_unique = map (\((_, b), (_, d)) -> (b - d)*(b - d)) (zip intersecta intersectb)
    unique = map (\(_, b) -> b*b) ( filter (\p -> not (elem p intersectb)) (filter (\p -> not (elem p intersecta)) a ++ b))

prop_test_refl :: Histogram -> Bool
prop_test_refl a = (is_similar a a) == True

report_refl :: Maybe Histogram
report_refl = Nothing

prop_test_symm :: Histogram -> Histogram -> Property
prop_test_symm a b = is_similar a b ==> is_similar b a

report_symm :: Maybe (Histogram, Histogram)
report_symm = Nothing

report_tran :: Maybe (Histogram, Histogram, Histogram)
report_tran = Just (histogram [(Interval 14, 25)], histogram [], histogram [(Interval 25, 26)])

prop_test_tran :: Histogram -> Histogram -> Histogram -> Property
prop_test_tran a b c = (is_similar a b) && (is_similar b c) ==> (is_similar a c)

-- Inspector O'Hare implemented match as follows:
match :: Histogram -> SigCard -> Verdict
match (Histogram h) (SigCard (Histogram r) v) =
  if d < 32 then RealWeapon else Dud where
    grab r (Histogram hs) = case filter (\x -> fst x == r) hs of
      [(_,x)] -> x
      _       -> 0
    squareDist (Histogram h1) (Histogram h2) = sum squares where
      common = sort . nub $ map fst h1 ++ map fst h2
      squares =
        map (\x -> (fromIntegral $ grab x (Histogram h1) - grab x (Histogram h2))**2)
            common
    d1 = squareDist (Histogram h) (Histogram r)
    h' = Histogram $ filter (\x -> fst x `elem` v) h
    r' = Histogram $ filter (\x -> fst x `elem` v) r
    d2 = squareDist h' r'
    d = sqrt (d1 - d2)

-- Use this reference card to find a false positive for `match`
refCard :: SigCard
refCard = SigCard (histogram r) v where
  r = [(Interval 4, 4000), (Interval 5, 6000), (Interval 6,300)]
  v = [Interval 5]

prop_test_fp :: (Int, Int) -> Property
prop_test_fp (i, j) = ((match unclean_hist refCard) == RealWeapon) ==> (is_similar h (Histogram clean_hist))  where
    offset = (Interval 10, 33)
    big_num = 16999990000
    unclean_hist = histogram [(Interval 4, 4000), (Interval 6, 300), (Interval 5, big_num), offset]
    (Histogram clean_hist) = histogram [(Interval 4, 4000), (Interval 6, 300), offset]
    h = histogram [(Interval 4, 4000), (Interval 6, 300)]

falsePos :: Histogram
falsePos = histogram [(Interval 4, 4000), (Interval 6, 300), (Interval 5, 16999990000), (Interval 10, 33)]