{- data size display and parsing
 -
 - Copyright 2011 Joey Hess <id@joeyh.name>
 -
 - License: BSD-2-clause
 -
 -
 - And now a rant: 
 -
 - In the beginning, we had powers of two, and they were good.
 -
 - Disk drive manufacturers noticed that some powers of two were
 - sorta close to some powers of ten, and that rounding down to the nearest
 - power of ten allowed them to advertise their drives were bigger. This
 - was sorta annoying.
 -
 - Then drives got big. Really, really big. This was good.
 -
 - Except that the small rounding error perpretrated by the drive
 - manufacturers suffered the fate of a small error, and became a large
 - error. This was bad.
 -
 - So, a committee was formed. And it arrived at a committee-like decision,
 - which satisfied noone, confused everyone, and made the world an uglier
 - place. As with all committees, this was meh.
 -
 - And the drive manufacturers happily continued selling drives that are
 - increasingly smaller than you'd expect, if you don't count on your
 - fingers. But that are increasingly too big for anyone to much notice.
 - This caused me to need git-annex.
 -
 - Thus, I use units here that I loathe. Because if I didn't, people would
 - be confused that their drives seem the wrong size, and other people would
 - complain at me for not being standards compliant. And we call this
 - progress?
 -}

module Utility.DataUnits (
	dataUnits,
	storageUnits,
	memoryUnits,
	bandwidthUnits,
	oldSchoolUnits,
	Unit(..),
	ByteSize,

	roughSize,
	roughSize',
	compareSizes,
	readSize
) where

import Data.List
import Data.Char

import Utility.HumanNumber

type ByteSize = Integer
type Name = String
type Abbrev = String
data Unit = Unit ByteSize Abbrev Name
	deriving (Eq Unit
Unit -> Unit -> Bool
Unit -> Unit -> Ordering
Unit -> Unit -> Unit
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Unit -> Unit -> Unit
$cmin :: Unit -> Unit -> Unit
max :: Unit -> Unit -> Unit
$cmax :: Unit -> Unit -> Unit
>= :: Unit -> Unit -> Bool
$c>= :: Unit -> Unit -> Bool
> :: Unit -> Unit -> Bool
$c> :: Unit -> Unit -> Bool
<= :: Unit -> Unit -> Bool
$c<= :: Unit -> Unit -> Bool
< :: Unit -> Unit -> Bool
$c< :: Unit -> Unit -> Bool
compare :: Unit -> Unit -> Ordering
$ccompare :: Unit -> Unit -> Ordering
Ord, Int -> Unit -> ShowS
[Unit] -> ShowS
Unit -> Abbrev
forall a.
(Int -> a -> ShowS) -> (a -> Abbrev) -> ([a] -> ShowS) -> Show a
showList :: [Unit] -> ShowS
$cshowList :: [Unit] -> ShowS
show :: Unit -> Abbrev
$cshow :: Unit -> Abbrev
showsPrec :: Int -> Unit -> ShowS
$cshowsPrec :: Int -> Unit -> ShowS
Show, Unit -> Unit -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Unit -> Unit -> Bool
$c/= :: Unit -> Unit -> Bool
== :: Unit -> Unit -> Bool
$c== :: Unit -> Unit -> Bool
Eq)

dataUnits :: [Unit]
dataUnits :: [Unit]
dataUnits = [Unit]
storageUnits forall a. [a] -> [a] -> [a]
++ [Unit]
memoryUnits

{- Storage units are (stupidly) powers of ten. -}
storageUnits :: [Unit]
storageUnits :: [Unit]
storageUnits =
	[ ByteSize -> Abbrev -> Abbrev -> Unit
Unit (ByteSize -> ByteSize
p ByteSize
8) Abbrev
"YB" Abbrev
"yottabyte"
	, ByteSize -> Abbrev -> Abbrev -> Unit
Unit (ByteSize -> ByteSize
p ByteSize
7) Abbrev
"ZB" Abbrev
"zettabyte"
	, ByteSize -> Abbrev -> Abbrev -> Unit
Unit (ByteSize -> ByteSize
p ByteSize
6) Abbrev
"EB" Abbrev
"exabyte"
	, ByteSize -> Abbrev -> Abbrev -> Unit
Unit (ByteSize -> ByteSize
p ByteSize
5) Abbrev
"PB" Abbrev
"petabyte"
	, ByteSize -> Abbrev -> Abbrev -> Unit
Unit (ByteSize -> ByteSize
p ByteSize
4) Abbrev
"TB" Abbrev
"terabyte"
	, ByteSize -> Abbrev -> Abbrev -> Unit
Unit (ByteSize -> ByteSize
p ByteSize
3) Abbrev
"GB" Abbrev
"gigabyte"
	, ByteSize -> Abbrev -> Abbrev -> Unit
Unit (ByteSize -> ByteSize
p ByteSize
2) Abbrev
"MB" Abbrev
"megabyte"
	, ByteSize -> Abbrev -> Abbrev -> Unit
Unit (ByteSize -> ByteSize
p ByteSize
1) Abbrev
"kB" Abbrev
"kilobyte" -- weird capitalization thanks to committe
	, ByteSize -> Abbrev -> Abbrev -> Unit
Unit (ByteSize -> ByteSize
p ByteSize
0) Abbrev
"B" Abbrev
"byte"
	]
  where
	p :: Integer -> Integer
	p :: ByteSize -> ByteSize
p ByteSize
n = ByteSize
1000forall a b. (Num a, Integral b) => a -> b -> a
^ByteSize
n

{- Memory units are (stupidly named) powers of 2. -}
memoryUnits :: [Unit]
memoryUnits :: [Unit]
memoryUnits =
	[ ByteSize -> Abbrev -> Abbrev -> Unit
Unit (ByteSize -> ByteSize
p ByteSize
8) Abbrev
"YiB" Abbrev
"yobibyte"
	, ByteSize -> Abbrev -> Abbrev -> Unit
Unit (ByteSize -> ByteSize
p ByteSize
7) Abbrev
"ZiB" Abbrev
"zebibyte"
	, ByteSize -> Abbrev -> Abbrev -> Unit
Unit (ByteSize -> ByteSize
p ByteSize
6) Abbrev
"EiB" Abbrev
"exbibyte"
	, ByteSize -> Abbrev -> Abbrev -> Unit
Unit (ByteSize -> ByteSize
p ByteSize
5) Abbrev
"PiB" Abbrev
"pebibyte"
	, ByteSize -> Abbrev -> Abbrev -> Unit
Unit (ByteSize -> ByteSize
p ByteSize
4) Abbrev
"TiB" Abbrev
"tebibyte"
	, ByteSize -> Abbrev -> Abbrev -> Unit
Unit (ByteSize -> ByteSize
p ByteSize
3) Abbrev
"GiB" Abbrev
"gibibyte"
	, ByteSize -> Abbrev -> Abbrev -> Unit
Unit (ByteSize -> ByteSize
p ByteSize
2) Abbrev
"MiB" Abbrev
"mebibyte"
	, ByteSize -> Abbrev -> Abbrev -> Unit
Unit (ByteSize -> ByteSize
p ByteSize
1) Abbrev
"KiB" Abbrev
"kibibyte"
	, ByteSize -> Abbrev -> Abbrev -> Unit
Unit (ByteSize -> ByteSize
p ByteSize
0) Abbrev
"B" Abbrev
"byte"
	]
  where
	p :: Integer -> Integer
	p :: ByteSize -> ByteSize
p ByteSize
n = ByteSize
2forall a b. (Num a, Integral b) => a -> b -> a
^(ByteSize
nforall a. Num a => a -> a -> a
*ByteSize
10)

{- Bandwidth units are only measured in bits if you're some crazy telco. -}
bandwidthUnits :: [Unit]
bandwidthUnits :: [Unit]
bandwidthUnits = forall a. HasCallStack => Abbrev -> a
error Abbrev
"stop trying to rip people off"

{- Do you yearn for the days when men were men and megabytes were megabytes? -}
oldSchoolUnits :: [Unit]
oldSchoolUnits :: [Unit]
oldSchoolUnits = forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (forall a b c. ((a, b) -> c) -> a -> b -> c
curry (Unit, Unit) -> Unit
mingle) [Unit]
storageUnits [Unit]
memoryUnits
  where
	mingle :: (Unit, Unit) -> Unit
mingle (Unit ByteSize
_ Abbrev
a Abbrev
n, Unit ByteSize
s' Abbrev
_ Abbrev
_) = ByteSize -> Abbrev -> Abbrev -> Unit
Unit ByteSize
s' Abbrev
a Abbrev
n

{- approximate display of a particular number of bytes -}
roughSize :: [Unit] -> Bool -> ByteSize -> String
roughSize :: [Unit] -> Bool -> ByteSize -> Abbrev
roughSize [Unit]
units Bool
short ByteSize
i = [Unit] -> Bool -> Int -> ByteSize -> Abbrev
roughSize' [Unit]
units Bool
short Int
2 ByteSize
i

roughSize' :: [Unit] -> Bool -> Int -> ByteSize -> String
roughSize' :: [Unit] -> Bool -> Int -> ByteSize -> Abbrev
roughSize' [Unit]
units Bool
short Int
precision ByteSize
i
	| ByteSize
i forall a. Ord a => a -> a -> Bool
< ByteSize
0 = Char
'-' forall a. a -> [a] -> [a]
: [Unit] -> ByteSize -> Abbrev
findUnit [Unit]
units' (forall a. Num a => a -> a
negate ByteSize
i)
	| Bool
otherwise = [Unit] -> ByteSize -> Abbrev
findUnit [Unit]
units' ByteSize
i
  where
	units' :: [Unit]
units' = forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a. Ord a => a -> a -> Ordering
compare) [Unit]
units -- largest first

	findUnit :: [Unit] -> ByteSize -> Abbrev
findUnit (u :: Unit
u@(Unit ByteSize
s Abbrev
_ Abbrev
_):[Unit]
us) ByteSize
i'
		| ByteSize
i' forall a. Ord a => a -> a -> Bool
>= ByteSize
s = ByteSize -> Unit -> Abbrev
showUnit ByteSize
i' Unit
u
		| Bool
otherwise = [Unit] -> ByteSize -> Abbrev
findUnit [Unit]
us ByteSize
i'
	findUnit [] ByteSize
i' = ByteSize -> Unit -> Abbrev
showUnit ByteSize
i' (forall a. [a] -> a
last [Unit]
units') -- bytes

	showUnit :: ByteSize -> Unit -> Abbrev
showUnit ByteSize
x (Unit ByteSize
size Abbrev
abbrev Abbrev
name) = Abbrev
s forall a. [a] -> [a] -> [a]
++ Abbrev
" " forall a. [a] -> [a] -> [a]
++ Abbrev
unit
	  where
		v :: Double
v = (forall a. Num a => ByteSize -> a
fromInteger ByteSize
x :: Double) forall a. Fractional a => a -> a -> a
/ forall a. Num a => ByteSize -> a
fromInteger ByteSize
size
		s :: Abbrev
s = forall a. RealFrac a => Int -> a -> Abbrev
showImprecise Int
precision Double
v
		unit :: Abbrev
unit
			| Bool
short = Abbrev
abbrev
			| Abbrev
s forall a. Eq a => a -> a -> Bool
== Abbrev
"1" = Abbrev
name
			| Bool
otherwise = Abbrev
name forall a. [a] -> [a] -> [a]
++ Abbrev
"s"

{- displays comparison of two sizes -}
compareSizes :: [Unit] -> Bool -> ByteSize -> ByteSize -> String
compareSizes :: [Unit] -> Bool -> ByteSize -> ByteSize -> Abbrev
compareSizes [Unit]
units Bool
abbrev ByteSize
old ByteSize
new
	| ByteSize
old forall a. Ord a => a -> a -> Bool
> ByteSize
new = [Unit] -> Bool -> ByteSize -> Abbrev
roughSize [Unit]
units Bool
abbrev (ByteSize
old forall a. Num a => a -> a -> a
- ByteSize
new) forall a. [a] -> [a] -> [a]
++ Abbrev
" smaller"
	| ByteSize
old forall a. Ord a => a -> a -> Bool
< ByteSize
new = [Unit] -> Bool -> ByteSize -> Abbrev
roughSize [Unit]
units Bool
abbrev (ByteSize
new forall a. Num a => a -> a -> a
- ByteSize
old) forall a. [a] -> [a] -> [a]
++ Abbrev
" larger"
	| Bool
otherwise = Abbrev
"same"

{- Parses strings like "10 kilobytes" or "0.5tb". -}
readSize :: [Unit] -> String -> Maybe ByteSize
readSize :: [Unit] -> Abbrev -> Maybe ByteSize
readSize [Unit]
units Abbrev
input
	| forall (t :: * -> *) a. Foldable t => t a -> Bool
null [(Double, Abbrev)]
parsednum Bool -> Bool -> Bool
|| forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteSize]
parsedunit = forall a. Maybe a
Nothing
	| Bool
otherwise = forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall a b. (RealFrac a, Integral b) => a -> b
round forall a b. (a -> b) -> a -> b
$ Double
number forall a. Num a => a -> a -> a
* forall a b. (Integral a, Num b) => a -> b
fromIntegral ByteSize
multiplier
  where
	(Double
number, Abbrev
rest) = forall a. [a] -> a
head [(Double, Abbrev)]
parsednum
	multiplier :: ByteSize
multiplier = forall a. [a] -> a
head [ByteSize]
parsedunit
	unitname :: Abbrev
unitname = forall a. (a -> Bool) -> [a] -> [a]
takeWhile Char -> Bool
isAlpha forall a b. (a -> b) -> a -> b
$ forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isSpace Abbrev
rest

	parsednum :: [(Double, Abbrev)]
parsednum = forall a. Read a => ReadS a
reads Abbrev
input :: [(Double, String)]
	parsedunit :: [ByteSize]
parsedunit = [Unit] -> Abbrev -> [ByteSize]
lookupUnit [Unit]
units Abbrev
unitname

	lookupUnit :: [Unit] -> Abbrev -> [ByteSize]
lookupUnit [Unit]
_ [] = [ByteSize
1] -- no unit given, assume bytes
	lookupUnit [] Abbrev
_ = []
	lookupUnit (Unit ByteSize
s Abbrev
a Abbrev
n:[Unit]
us) Abbrev
v
		| Abbrev
a Abbrev -> Abbrev -> Bool
~~ Abbrev
v Bool -> Bool -> Bool
|| Abbrev
n Abbrev -> Abbrev -> Bool
~~ Abbrev
v = [ByteSize
s]
		| ShowS
plural Abbrev
n Abbrev -> Abbrev -> Bool
~~ Abbrev
v Bool -> Bool -> Bool
|| Abbrev
a Abbrev -> Abbrev -> Bool
~~ ShowS
byteabbrev Abbrev
v = [ByteSize
s]
		| Bool
otherwise = [Unit] -> Abbrev -> [ByteSize]
lookupUnit [Unit]
us Abbrev
v
		
	Abbrev
a ~~ :: Abbrev -> Abbrev -> Bool
~~ Abbrev
b = forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower Abbrev
a forall a. Eq a => a -> a -> Bool
== forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower Abbrev
b
		
	plural :: ShowS
plural Abbrev
n = Abbrev
n forall a. [a] -> [a] -> [a]
++ Abbrev
"s"
	byteabbrev :: ShowS
byteabbrev Abbrev
a = Abbrev
a forall a. [a] -> [a] -> [a]
++ Abbrev
"b"