Sunday, May 17, 2009

data WeightCharacterTuple = WeightCharacterTuple {
        weight :: Int,
        character :: Char
}deriving (Show)

instance Eq WeightCharacterTuple where
        (WeightCharacterTuple a _) == (WeightCharacterTuple b _) = a==b

instance Ord WeightCharacterTuple where
        (WeightCharacterTuple a _) > (WeightCharacterTuple b _) = a > b
        (WeightCharacterTuple a _) >= (WeightCharacterTuple b _) = a >= b
        (WeightCharacterTuple a _) < (WeightCharacterTuple b _) = a <>
        (WeightCharacterTuple a _) <= (WeightCharacterTuple b _) = a <= b

data Tree a = Node a (Tree a) (Tree a) | Leaf a
        deriving (Show)


instance Eq a => Eq (Tree a) where
        (==) (Node node1 _ _)   (Node node2 _ _)        = node1 == node2
        (==) (Node node1 _ _)   (Leaf node2)            = node1 == node2
        (==) (Leaf node1)       (Node node2 _ _)        = node1 == node2
        (==) (Leaf node1)       (Leaf node2)            = node1 == node2



operateOnTwoTreeNodes operation left right =
        operation l r
                where
                        l=nodeFromTree left
                        r=nodeFromTree right
                        nodeFromTree tree = case tree of
                                                (Node n _ _) -> n
                                                (Leaf n) -> n

instance Ord a => Ord (Tree a) where
        left >= right =  operateOnTwoTreeNodes (>=) left right
        left < right ="  operateOnTwoTreeNodes">
repeats :: String -> [WeightCharacterTuple]
repeats string = quickSort $ zipWith WeightCharacterTuple counts uniqueLetters
        where
        counts = map (frequency string) uniqueLetters
        frequency :: String -> Char -> Int
        frequency (x:xs) c
                | c == x        = 1 + frequency xs c
                | otherwise     = frequency xs c
        frequency _ c   = 0
        uniqueLetters = unique string

unique :: String -> String -- get the unique letters in the string
unique (x:xs)   = [x] ++ unique [y | y <- xs, y /= x ]
unique []               = []




huffman :: [Tree WeightCharacterTuple] -> [Tree WeightCharacterTuple]
huffman (min1:min2:rest) = huffman newList
        where
        newList
                | length rest /= 0 =  quickSort ((merge min1 min2):rest) -- TODO Sorting of the list is required
                | otherwise = [merge min1 min2]
        merge (Node node left right) (Leaf leaf)
                | node <= leaf = Node (WeightCharacterTuple newWeight '*') (Node node left right) (Leaf leaf)
                | otherwise    = Node (WeightCharacterTuple newWeight '*') (Leaf leaf) (Node node left right)
                where
                newWeight = (weight leaf) + (weight node)
        merge (Leaf leaf) (Node node left right)
                | node <= leaf = Node (WeightCharacterTuple newWeight '*') (Node node left right) (Leaf leaf)
                | otherwise    = Node (WeightCharacterTuple newWeight '*') (Leaf leaf) (Node node left right)
                where
                newWeight = (weight leaf) + (weight node)
        merge (Leaf leaf1) (Leaf leaf2)
                | leaf1 <= leaf2 = Node (WeightCharacterTuple newWeight '*') (Leaf leaf1) (Leaf leaf2)
                | otherwise =      Node (WeightCharacterTuple newWeight '*') (Leaf leaf2) (Leaf leaf1)
                where
                newWeight = (weight leaf1) + (weight leaf2)
huffman x = x


quickSort (x:xs) = l1 ++ [x] ++ l2 -- items less than x + x + items bigger than x
        where
                l1 = quickSort [y | y <- xs, y <>
                l2 = quickSort [y | y <- xs, y >= x] -- sorted items greater than x
quickSort [] = []


main=do
        x <- getLine
        let y1=unique x
        let y2=repeats x
        putStrLn (show y1)
        putStrLn (show y2)
        let y=map Leaf y2
        let z = huffman y
        putStrLn (show z)
        return ()

Monday, May 11, 2009

Large File support in Linux

While working on the minix fs support using FUSE, I realized that if you simply call open on a file, it fails with the "FILE too large" error if the file is > 2G

To get it to work, we need to compile the code like this

gcc -D_FILE_OFFSET_BITS=64 file.c

Sunday, May 10, 2009

Huffman Tree construction

I finally got the tree construction done - 

Did not like two things though -

1. I had to write a two separate sorting functions .. Cant seem to create instance of Eq of Parametric types . 

2. The pattern matching seems a little cluttered.

data WeightedCharacter = WeightedCharacter { weight :: Int,
 character :: Char
}deriving (Show)

instance Eq WeightedCharacter where
 (WeightedCharacter a _) == (WeightedCharacter b _) = a==b

instance Ord WeightedCharacter where
 (WeightedCharacter a _) > (WeightedCharacter b _) = a > b
 (WeightedCharacter a _) >= (WeightedCharacter b _) = a >= b
 (WeightedCharacter a _) < (WeightedCharacter b _) = a < b
 (WeightedCharacter a _) <= (WeightedCharacter b _) = a <= b


type CharacterFrequencyTuple = (Int,Char)
{-
 repeats hello
  = [(1,'e'),(1,'h'),(1,'o'),(2,'l')]

-}
repeats :: String -> [CharacterFrequencyTuple]
repeats string = quickSort $ zip counts uniqueLetters
 where
 counts = map (frequency string) uniqueLetters
 frequency :: String -> Char -> Int
 frequency (x:xs) c
  | c == x = 1 + frequency xs c
  | otherwise = frequency xs c
 frequency _ c = 0
 uniqueLetters = unique string

unique :: String -> String -- get the unique letters in the string
unique (x:xs) = [x] ++ unique [y | y <- xs, y /= x ]
unique [] = []


data Tree a = Node a (Tree a) (Tree a) | Leaf a
 deriving (Show)


huffman :: [Tree CharacterFrequencyTuple] -> [Tree CharacterFrequencyTuple]
huffman (min1:min2:rest) = huffman newList
 where
 newList
  | length rest /= 0 = sortTreeList ((merge min1 min2):rest) -- TODO Sorting of the list is required
  | otherwise = [merge min1 min2]
 merge (Node node left right) (Leaf leaf)
  | node <= leaf = Node (newWeight,'*') (Node node left right) (Leaf leaf)
  | otherwise = Node (newWeight,'*') (Leaf leaf) (Node node left right)
  where
  newWeight = (fst leaf) + (fst node)
 merge (Leaf leaf) (Node node left right)
  | node <= leaf = Node (newWeight,'*') (Node node left right) (Leaf leaf)
  | otherwise = Node (newWeight,'*') (Leaf leaf) (Node node left right)
  where
  newWeight = (fst leaf) + (fst node)
 merge (Leaf leaf1) (Leaf leaf2)
  | leaf1 <= leaf2 = Node (newWeight,'*') (Leaf leaf1) (Leaf leaf2)
  | otherwise = Node (newWeight,'*') (Leaf leaf2) (Leaf leaf1)
  where
  newWeight = (fst leaf1) + (fst leaf2)
huffman x = x


quickSort (x:xs) = l1 ++ [x] ++ l2 -- items less than x + x + items bigger than x
 where
  l1 = quickSort [y | y <- xs, y < x] -- sorted items less than x
  l2 = quickSort [y | y <- xs, y >= x] -- sorted items greater than x
quickSort [] = []

sortTreeList (x:xs) = l1 ++ [x] ++ l2
 where
  l1 = sortTreeList [y | y <- xs, (lessThan y x)] 
  l2 = sortTreeList [y | y <- xs, (not (lessThan y x))] 
  lessThan (Node n1 _ _) (Node n2 _ _) = (fst n1) < (fst n2)
  lessThan (Node n1 _ _) (Leaf n2) = (fst n1) < (fst n2)
  lessThan (Leaf n1) (Node n2 _ _) = (fst n1) < (fst n2)
  lessThan (Leaf n1) (Leaf n2) = (fst n1) < (fst n2)
sortTreeList [] = [] 

main=do
 x <- getLine 
 let y1=unique x
 let y2=repeats x
 putStrLn (show y1)
 putStrLn (show y2)
 let y=map Leaf y2
 let z = huffman y
 putStrLn (show z)
 return ()



Thursday, May 7, 2009

Huffman encoding in Haskell

My attempt at Huffman encoding in Haskell ... not complete ... hardly...anyway, I now need to switch to Minix FS in fuse

type CharacterFrequencyTuple = (Int,Char)

{-
  repeats hello
  = [(1,'e'),(1,'h'),(1,'o'),(2,'l')]

-}
repeats :: String -> [CharacterFrequencyTuple]
repeats string = quickSort $ zip counts uniqueLetters
  where
  counts = map (frequency string) uniqueLetters
  frequency :: String -> Char -> Int
  frequency (x:xs) c
  | c == x = 1 + frequency xs c
  | otherwise = frequency xs c
  frequency _ c = 0
  uniqueLetters = unique string

unique :: String -> String -- get the unique letters in the string
unique (x:xs) = [x] ++ unique [y | y <- xs, y /= x ]
unique [] = []


data Tree a = Node a (Tree a) (Tree a) | Leaf a
  deriving (Show)


huffman :: [CharacterFrequencyTuple] -> Tree Char
huffman (x:xs) = Leaf 'A'

quickSort (x:xs) = l1 ++ [x] ++ l2 -- items less than x + x + items bigger than x
  where
  l1 = quickSort [y | y <- xs, y <>
  l2 = quickSort [y | y <- xs, y >= x] -- sorted items greater than x
quickSort [] = []

main=do
  x <- getLine
  let y1=unique x
  let y2=repeats x
  putStrLn (show y1)
  putStrLn (show y2)


Tuesday, May 5, 2009

Functional programming and Minix

Looking at two things right now -

Functional programming and Minix

For FP, I'm trying to do the Huffman encoding in Haskell

For Minix, I am working on getting fuse based Minix FS to work...Fuse seems interesting.