Safe Haskell | None |
---|---|
Language | Haskell2010 |
- module Ringo.Types
- makeConfig :: [Table] -> [Fact] -> Settings -> TypeDefaults -> Either [ValidationError] Config
- extractFactTable :: Config -> Fact -> Table
- extractDimensionTables :: Config -> Fact -> [Table]
- extractDependencies :: Config -> Fact -> Dependencies
- dimensionTableDefinitionSQL :: Config -> Table -> [Text]
- factTableDefinitionSQL :: Config -> Fact -> Table -> [Text]
- dimensionTablePopulationSQL :: TablePopulationMode -> Config -> Fact -> TableName -> Text
- factTablePopulationSQL :: TablePopulationMode -> Config -> Fact -> [Text]
- dimensionTableDefinitionStatements :: Config -> Table -> [Statement]
- factTableDefinitionStatements :: Config -> Fact -> Table -> [Statement]
- dimensionTablePopulationStatement :: TablePopulationMode -> Config -> Fact -> TableName -> Statement
- factTablePopulationStatements :: TablePopulationMode -> Config -> Fact -> [Statement]
Documentation
The examples in this module assume the following code has been run. The :{ and :} will only work in GHCi.
>>>
:set -XOverloadedStrings
>>>
import Ringo
>>>
import qualified Data.Map as Map
>>>
import qualified Data.Text as Text
>>>
import Data.List (nub)
>>>
import Data.Monoid ((<>))
>>>
:{
let sessionEventsTable = Table { tableName = "session_events" , tableColumns = [ Column "id" "uuid" NotNull , Column "created_at" "timestamp without time zone" NotNull , Column "member_id" "integer" Null , Column "publisher_id" "integer" NotNull , Column "user_agent" "character varying(1024)" Null , Column "browser_name" "character varying(50)" Null , Column "os" "character varying(50)" Null , Column "geo_country_name" "character varying(50)" Null , Column "geo_city_name" "character varying(50)" Null , Column "geo_continent_name" "character varying(15)" Null , Column "geo_most_specific_subdivision_name" "character varying(100)" Null , Column "geo_longitude" "numeric(9,6)" Null , Column "geo_latitude" "numeric(9,6)" Null , Column "geo_time_zone" "character varying(20)" Null , Column "geo_postal_code" "character varying(20)" Null , Column "user_agent_name" "character varying(100)" Null , Column "user_agent_type" "character varying(15)" Null , Column "user_agent_version" "character varying(100)" Null , Column "user_agent_device" "character varying(15)" Null ] , tableConstraints = [ PrimaryKey "id" ] } sessionFact = Fact { factName = "session" , factTableName = "session_events" , factTablePersistent = True , factParentNames = [] , factColumns = [ FactColumn "created_at" $ DimTime , FactColumn "publisher_id" $ NoDimId , FactColumn "browser_name" $ DimVal "user_agent" , FactColumn "os" $ DimVal "user_agent" , FactColumn "user_agent_name" $ DimVal "user_agent" , FactColumn "geo_country_name" $ DimVal "geo" , FactColumn "geo_city_name" $ DimVal "geo" , FactColumn "geo_continent_name" $ DimVal "geo" , FactColumn "session_count" $ FactCount Nothing ] } pageViewEventsTable = Table { tableName = "page_view_events" , tableColumns = [ Column "id" "uuid" NotNull , Column "created_at" "timestamp without time zone" NotNull , Column "member_id" "integer" Null , Column "publisher_id" "integer" NotNull , Column "device_tracker_id" "uuid" Null , Column "session_event_id" "uuid" Null , Column "page_type" "character varying(20)" NotNull , Column "referrer" "character varying(1024)" Null , Column "url" "character varying(1024)" Null , Column "referrer_id" "integer" Null ] , tableConstraints = [ PrimaryKey "id" , ForeignKey "session_events" [ ("session_event_id", "id") , ("publisher_id", "publisher_id") ] , ForeignKey "referrers" [ ("referrer_id", "id") , ("publisher_id", "publisher_id") ] ] } pageViewFact = Fact { factName = "page_view" , factTableName = "page_view_events" , factTablePersistent = True , factParentNames = [ "session" ] , factColumns = [ FactColumn "created_at" $ DimTime , FactColumn "publisher_id" $ NoDimId , FactColumn "page_type" $ DimVal "page_type" , FactColumn "referrer_id" $ DimId "referrers" , FactColumn "view_count" $ FactCount Nothing ] } referrersTable = Table { tableName = "referrers" , tableColumns = [ Column "id" "integer" NotNull , Column "publisher_id" "integer" NotNull , Column "name" "character varying(100)" NotNull ] , tableConstraints = [ PrimaryKey "id" , UniqueKey ["publisher_id", "name"] ] } tables = [sessionEventsTable, pageViewEventsTable, referrersTable] facts = [sessionFact, pageViewFact] typeDefaults = Map.fromList [ ("integer", "-1") , ("timestamp", "'00-00-00 00:00:00'") , ("character", "'__UNKNOWN_VAL__'") , ("uuid", "'00000000-0000-0000-0000-000000000000'::uuid") , ("boolean", "false") , ("json", "'{}'::json") , ("numeric", "-1") , ("text", "'__UNKNOWN_VAL__'") ] settings = defSettings { settingTableNameSuffixTemplate = "" } config = case makeConfig tables facts settings typeDefaults of Left errors -> error . unlines . map show $ errors Right config -> config :}
module Ringo.Types
makeConfig :: [Table] -> [Fact] -> Settings -> TypeDefaults -> Either [ValidationError] Config Source #
extractFactTable :: Config -> Fact -> Table Source #
>>>
print $ extractFactTable config sessionFact
Table fact_session_by_minute Column created_at_minute_id bigint NOT NULL Column publisher_id integer NOT NULL Column session_count integer NOT NULL Column geo_id integer NOT NULL Column user_agent_id integer NOT NULL UniqueKey (created_at_minute_id, publisher_id, geo_id, user_agent_id)>>>
print $ extractFactTable config pageViewFact
Table fact_page_view_by_minute Column created_at_minute_id bigint NOT NULL Column publisher_id integer NOT NULL Column view_count integer NOT NULL Column referrer_id integer NOT NULL Column page_type_id integer NOT NULL Column geo_id integer NOT NULL Column user_agent_id integer NOT NULL UniqueKey (created_at_minute_id, publisher_id, referrer_id, page_type_id, geo_id, user_agent_id)
extractDimensionTables :: Config -> Fact -> [Table] Source #
>>>
mapM_ print $ extractDimensionTables config sessionFact
Table dim_geo Column id serial NOT NULL Column country_name character varying(50) NOT NULL Column city_name character varying(50) NOT NULL Column continent_name character varying(15) NOT NULL PrimaryKey id UniqueKey (country_name, city_name, continent_name) Table dim_user_agent Column id serial NOT NULL Column browser_name character varying(50) NOT NULL Column os character varying(50) NOT NULL Column name character varying(100) NOT NULL PrimaryKey id UniqueKey (browser_name, os, name)>>>
mapM_ print . filter (`notElem` tables) $ extractDimensionTables config pageViewFact
Table dim_page_type Column id serial NOT NULL Column page_type character varying(20) NOT NULL PrimaryKey id UniqueKey (page_type)
extractDependencies :: Config -> Fact -> Dependencies Source #
>>>
let depsToStr = map ((\(k, vs) -> Text.unpack $ k <> ":\n - " <> Text.intercalate "\n - " vs)) . Map.toList
>>>
mapM_ putStrLn . depsToStr $ extractDependencies config sessionFact
dim_geo: - session_events dim_user_agent: - session_events fact_session_by_minute: - session_events - dim_user_agent - dim_geo>>>
mapM_ putStrLn . depsToStr $ extractDependencies config pageViewFact
dim_page_type: - page_view_events fact_page_view_by_minute: - page_view_events - session_events - dim_page_type - referrers - dim_user_agent - dim_geo
dimensionTableDefinitionSQL :: Config -> Table -> [Text] Source #
>>>
let dimTables = filter (`notElem` tables) . nub . concatMap (extractDimensionTables config) $ facts
>>>
let sqls = map (dimensionTableDefinitionSQL config) dimTables
>>>
mapM_ (\sqls -> mapM_ (putStr . Text.unpack) sqls >> putStrLn "--------" ) sqls
create table dim_geo ( id serial not null, country_name character varying(50) not null, city_name character varying(50) not null, continent_name character varying(15) not null ) ; alter table dim_geo add primary key (id); alter table dim_geo add unique (country_name, city_name, continent_name); create index on dim_geo (country_name) ; create index on dim_geo (city_name) ; create index on dim_geo (continent_name) ; -------- create table dim_user_agent ( id serial not null, browser_name character varying(50) not null, os character varying(50) not null, name character varying(100) not null ) ; alter table dim_user_agent add primary key (id); alter table dim_user_agent add unique (browser_name, os, name); create index on dim_user_agent (browser_name) ; create index on dim_user_agent (os) ; create index on dim_user_agent (name) ; -------- create table dim_page_type ( id serial not null, page_type character varying(20) not null ) ; alter table dim_page_type add primary key (id); alter table dim_page_type add unique (page_type); --------
factTableDefinitionSQL :: Config -> Fact -> Table -> [Text] Source #
>>>
let storySessionFactTable = extractFactTable config sessionFact
>>>
let sqls = factTableDefinitionSQL config sessionFact storySessionFactTable
>>>
mapM_ (putStr . Text.unpack) sqls
create table fact_session_by_minute ( created_at_minute_id bigint not null, publisher_id integer not null, session_count integer not null, geo_id integer not null, user_agent_id integer not null ) ; alter table fact_session_by_minute add unique (created_at_minute_id, publisher_id, geo_id, user_agent_id); create index on fact_session_by_minute (created_at_minute_id) ; create index on fact_session_by_minute (publisher_id) ; create index on fact_session_by_minute (geo_id) ; create index on fact_session_by_minute (user_agent_id) ;>>>
let pageViewFactTable = extractFactTable config pageViewFact
>>>
let sqls = factTableDefinitionSQL config pageViewFact pageViewFactTable
>>>
mapM_ (putStr . Text.unpack) sqls
create table fact_page_view_by_minute ( created_at_minute_id bigint not null, publisher_id integer not null, view_count integer not null, referrer_id integer not null, page_type_id integer not null, geo_id integer not null, user_agent_id integer not null ) ; alter table fact_page_view_by_minute add unique (created_at_minute_id, publisher_id, referrer_id, page_type_id, geo_id, user_agent_id); create index on fact_page_view_by_minute (created_at_minute_id) ; create index on fact_page_view_by_minute (publisher_id) ; create index on fact_page_view_by_minute (referrer_id) ; create index on fact_page_view_by_minute (page_type_id) ; create index on fact_page_view_by_minute (geo_id) ; create index on fact_page_view_by_minute (user_agent_id) ;
dimensionTablePopulationSQL :: TablePopulationMode -> Config -> Fact -> TableName -> Text Source #
>>>
let storySessionDimTableNames = map tableName $ extractDimensionTables config sessionFact
>>>
let sqls = map (dimensionTablePopulationSQL FullPopulation config sessionFact) storySessionDimTableNames
>>>
mapM_ (putStr . Text.unpack) sqls
insert into dim_geo (country_name, city_name, continent_name) select distinct coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name, coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name, coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name from session_events where (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null) and created_at < ? ; insert into dim_user_agent (browser_name, os, name) select distinct coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name, coalesce(session_events.os,'__UNKNOWN_VAL__') as os, coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name from session_events where (browser_name is not null or os is not null or user_agent_name is not null) and created_at < ? ;>>>
let sqls = map (dimensionTablePopulationSQL IncrementalPopulation config sessionFact) storySessionDimTableNames
>>>
mapM_ (putStr . Text.unpack) sqls
insert into dim_geo (country_name, city_name, continent_name) select x.* from (select distinct coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') as geo_country_name, coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') as geo_city_name, coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__') as geo_continent_name from session_events where (geo_country_name is not null or geo_city_name is not null or geo_continent_name is not null) and created_at < ? and created_at >= ?) as x left outer join dim_geo on dim_geo.country_name = x.geo_country_name and dim_geo.city_name = x.geo_city_name and dim_geo.continent_name = x.geo_continent_name where dim_geo.country_name is null and dim_geo.city_name is null and dim_geo.continent_name is null ; insert into dim_user_agent (browser_name, os, name) select x.* from (select distinct coalesce(session_events.browser_name,'__UNKNOWN_VAL__') as browser_name, coalesce(session_events.os,'__UNKNOWN_VAL__') as os, coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__') as user_agent_name from session_events where (browser_name is not null or os is not null or user_agent_name is not null) and created_at < ? and created_at >= ?) as x left outer join dim_user_agent on dim_user_agent.browser_name = x.browser_name and dim_user_agent.os = x.os and dim_user_agent.name = x.user_agent_name where dim_user_agent.browser_name is null and dim_user_agent.os is null and dim_user_agent.name is null ;>>>
let pageViewDimTableNames = map tableName . filter (`notElem` tables) $ extractDimensionTables config pageViewFact
>>>
let sqls = map (dimensionTablePopulationSQL FullPopulation config pageViewFact) pageViewDimTableNames
>>>
mapM_ (putStr . Text.unpack) sqls
insert into dim_page_type (page_type) select distinct page_view_events.page_type as page_type from page_view_events where (page_type is not null) and created_at < ? ;>>>
let sqls = map (dimensionTablePopulationSQL IncrementalPopulation config pageViewFact) pageViewDimTableNames
>>>
mapM_ (putStr . Text.unpack) sqls
insert into dim_page_type (page_type) select x.* from (select distinct page_view_events.page_type as page_type from page_view_events where (page_type is not null) and created_at < ? and created_at >= ?) as x left outer join dim_page_type on dim_page_type.page_type = x.page_type where dim_page_type.page_type is null ;
factTablePopulationSQL :: TablePopulationMode -> Config -> Fact -> [Text] Source #
>>>
let sqls = factTablePopulationSQL FullPopulation config sessionFact
>>>
mapM_ (putStr . Text.unpack) sqls
insert into fact_session_by_minute (created_at_minute_id, publisher_id, session_count, geo_id, user_agent_id) select cast(floor(extract(epoch from session_events.created_at) / 60) as bigint) as xxff_created_at_minute_id, session_events.publisher_id as xxff_publisher_id, count(*) as xxff_session_count, coalesce((select id from dim_geo as dim_geo where dim_geo.country_name = coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') and dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') and dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__')),-1) as xxff_geo_id, coalesce((select id from dim_user_agent as dim_user_agent where dim_user_agent.browser_name = coalesce(session_events.browser_name,'__UNKNOWN_VAL__') and dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__') and dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id from session_events where session_events.created_at < ? group by xxff_created_at_minute_id, xxff_publisher_id, xxff_geo_id, xxff_user_agent_id ;>>>
let sqls = factTablePopulationSQL IncrementalPopulation config sessionFact
>>>
mapM_ (putStr . Text.unpack) sqls
insert into fact_session_by_minute (created_at_minute_id, publisher_id, session_count, geo_id, user_agent_id) select cast(floor(extract(epoch from session_events.created_at) / 60) as bigint) as xxff_created_at_minute_id, session_events.publisher_id as xxff_publisher_id, count(*) as xxff_session_count, coalesce((select id from dim_geo as dim_geo where dim_geo.country_name = coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') and dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') and dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__')),-1) as xxff_geo_id, coalesce((select id from dim_user_agent as dim_user_agent where dim_user_agent.browser_name = coalesce(session_events.browser_name,'__UNKNOWN_VAL__') and dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__') and dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id from session_events where session_events.created_at < ? and session_events.created_at >= ? group by xxff_created_at_minute_id, xxff_publisher_id, xxff_geo_id, xxff_user_agent_id ;>>>
let sqls = factTablePopulationSQL FullPopulation config pageViewFact
>>>
mapM_ (putStr . Text.unpack) sqls
insert into fact_page_view_by_minute (created_at_minute_id, publisher_id, view_count, referrer_id, page_type_id, geo_id, user_agent_id) select cast(floor(extract(epoch from page_view_events.created_at) / 60) as bigint) as xxff_created_at_minute_id, page_view_events.publisher_id as xxff_publisher_id, count(*) as xxff_view_count, coalesce(page_view_events.referrer_id,-1) as xxff_referrer_id, coalesce((select id from dim_page_type as dim_page_type where dim_page_type.page_type = page_view_events.page_type),-1) as xxff_page_type_id, coalesce((select id from dim_geo as dim_geo where dim_geo.country_name = coalesce(session_events.geo_country_name,'__UNKNOWN_VAL__') and dim_geo.city_name = coalesce(session_events.geo_city_name,'__UNKNOWN_VAL__') and dim_geo.continent_name = coalesce(session_events.geo_continent_name,'__UNKNOWN_VAL__')),-1) as xxff_geo_id, coalesce((select id from dim_user_agent as dim_user_agent where dim_user_agent.browser_name = coalesce(session_events.browser_name,'__UNKNOWN_VAL__') and dim_user_agent.os = coalesce(session_events.os,'__UNKNOWN_VAL__') and dim_user_agent.name = coalesce(session_events.user_agent_name,'__UNKNOWN_VAL__')),-1) as xxff_user_agent_id from page_view_events left outer join session_events on page_view_events.session_event_id = session_events.id and page_view_events.publisher_id = session_events.publisher_id where page_view_events.created_at < ? group by xxff_created_at_minute_id, xxff_publisher_id, xxff_referrer_id, xxff_page_type_id, xxff_geo_id, xxff_user_agent_id ;
dimensionTablePopulationStatement :: TablePopulationMode -> Config -> Fact -> TableName -> Statement Source #
factTablePopulationStatements :: TablePopulationMode -> Config -> Fact -> [Statement] Source #