Leif Harald Karlsen
Saturation: A data transformation within a format and structure that only adds data derivable (according to some semantics) or otherwise computable from the source data
Sometimes also called derivation, however we have used this only with respect to reasoning and semantics.
# Find all ancestors of Bob living in Oslo.
SELECT ?ancestor_name
WHERE {
ex:bob (ex-o:hasFather|ex-o:hasMother)+ ?ancestor .
?ancestor ex-o:hasName ?ancestor_name ;
ex-o:address ?address .
FILTER REGEX(?address, "^.*Oslo$")
}
# versus
SELECT ?ancestor_name
WHERE {
ex:bob ex-o:hasAncestor [ ex-o:hasName ?ancestor_name ;
ex-o:addressCity "Oslo" ] .
}
orders(order_id, customer_id, product_id, order_date, required_date)
countries(gid, geom)
column with geometries for countriesmake init
text
values'Problemveien 13, 0123, Oslo'
Problemveien
), zipcode
(0123
), city (Oslo
), etc.quantities_per_unit
column in
products
, we have values of the form: 16 - 2 kg boxes
18 - 500 g pkgs.
10 boxes x 12 pieces
12 - 200 ml jars
15 - 625 g jars
24 pkgs. x 4 pieces
16 kg pkg.
10 boxes x 30 bags
12 - 100 g bars
20 - 450 g glasses
24 - 500 ml bottles
24 - 250 g pkgs.
500 ml
36 boxes
...
g
,
kg
, ml
, etc.) and how it is packaged
(boxes
, bottles
, etc.)make shipping_packaging
):CREATE VIEW packaging(product_id, packaging) AS
WITH
words AS ( -- Splits each packaging word into a separate row, with the product's id
SELECT
product_id,
regexp_split_to_table(btrim(regexp_replace(quantity_per_unit, '(\d+)|(-)|(\sx\s)', ' ', 'g'), ' '), '\s+') AS word
FROM products
)
SELECT
product_id,
word AS packaging
FROM words
WHERE word NOT IN (SELECT unit FROM units) -- Drop units (these are contained in the utility table units)
AND length(word) > 1; -- Drop single letters
cloud_coverage=90
-> sky=cloudy
Need to put products into price categories (expensive
,
midrange
, cheap
)
(make shipping_expensiveness
):
Need to know whether countries are neighboring
(make shipping_spatial
):
CREATE VIEW neighbors(country, neighbor) AS
SELECT c1.country_name, c2.country_name
FROM countries AS c1
JOIN countries AS c2 ON (st_intersects(c1.geom, c2.geom)); -- Spatial intersection
CREATE VIEW not_neighbors(country1, country2) AS
SELECT c1.country_name, c2.country_name
FROM countries AS c1, countries AS c2
WHERE (c1.country_name, c2.country_name) NOT IN (SELECT country, neighbor FROM neighbors);
Also want to know from where to where each order is going and other attributes (simple denormalization):
CREATE VIEW shipment(order_id, product_id, from_country, to_country, expensiveness) AS
SELECT o.order_id,
o.product_id,
s.country AS from_country,
c.country AS to_country,
ex.expensiveness
FROM orders AS o
JOIN customers AS c USING (customer_id)
JOIN products AS p USING (product_id)
JOIN suppliers AS s USING (supplier_id)
JOIN expensiveness AS ex USING (product_id);
addresses ->
(street, house number, city) ->
CREATE VIEW street(name, num_houses, num_people) AS
WITH
streets AS (
SELECT DISTINCT btrim(regexp_replace(home_address, '\d+', '')) AS name
FROM suburbia.home
),
num_houses AS (
SELECT s.name, count(home_address) AS num_houses
FROM streets AS s, suburbia.home AS h
WHERE h.home_address LIKE '%' || s.name || '%'
GROUP BY s.name
),
num_people AS (
SELECT s.name, count(DISTINCT cid) AS num_people
FROM streets AS s, suburbia.home AS h, suburbia.citizen AS c
WHERE h.home_address LIKE '%' || s.name || '%' AND c.home = h.hid
GROUP BY s.name
)
SELECT s.name, nh.num_houses, np.num_people
FROM streets AS s JOIN num_houses AS nh USING (name)
JOIN num_people AS np USING (name);
Need to know in which countries we have warehouses
(make shipping_warehouses
):
CREATE VIEW warehouse(warehouse_id, country) AS
SELECT row_number() OVER (ORDER BY country) AS warehouse_id,
country
FROM (
SELECT country
FROM customers
GROUP BY country
HAVING count(*) > 4 -- More than 4 customers
UNION
SELECT country_name
FROM countries
WHERE st_area(geom) > 50 -- Large area
) AS t;
Lets add “same packaging” and finally, which orders can be shipped
with which, as Lore-rules (make shipping_rules
):
CREATE RELATION package_type(product_id smallint, ptype text);
package_type(pid, 'boxed') <- packaging(pid, pkg)
: pkg IN ('boxes', 'box', 'pkg', 'pkgs', 'pkgs.', 'bars', 'rounds', 'pies');
package_type(pid, 'bottled') <- packaging(pid, pkg)
: pkg IN ('glasses', 'bottles', 'bottle', 'jars');
package_type(pid, 'canned') <- packaging(pid, pkg)
: pkg IN ('tins', 'cans');
CREATE RELATION ship_with(oid1 int, oid2 int);
ship_with(oid1, oid2) <-
shipment(oid1, pid1, ship_from, ship_to, ex), -- Shipping from and to the same
shipment(oid2, pid2, ship_from, ship_to, ex), -- and same expensiveness
package_type(pid1, pt), package_type(pid2, pt), -- and same packaging,
warehouse(w1, ship_from), warehouse(w2, ship_to), -- from and to has warehouses,
not_neighbors(ship_to, ship_from); -- from and to are not neighbors
-- Finally make view with all groups of orders that can be shipped together
-- Note: ship_with-relation is reflexive, symmetric, transitive, i.e. an equivalence-relation
-- (thus partitions the orders into equivalence classes that can be shipped together)
CREATE VIEW shipping_group(sgroup) AS
SELECT *
FROM ( -- Aggregate orders that can be shipped together into arrays
SELECT DISTINCT array_agg(DISTINCT oid2 ORDER BY oid2) AS sgroup
FROM ship_with
GROUP BY oid1 -- oid1 will also be in the array, as ship_with is reflexive
) AS t
WHERE array_length(sgroup, 1) > 1 -- Drop single-element groups
ORDER BY array_length(sgroup, 1) DESC;
owl:sameAs
), types
(rdf:type
, rdfs:subClassOf
), relationships
(rdfs:subPropertyOf
, family-relationships), etc.ex:packaging
to relate products
(ex:Product
) to their package type word
(boxes
, glasses
, etc.)ex:price
to relate products (ex:Product
)
to their price# Introduce each packaging type as a class
ex:Cheap a owl:Class ; rdfs:subClassOf ex:Product .
ex:Midrange a owl:Class ; rdfs:subClassOf ex:Product .
ex:Expensive a owl:Class ; rdfs:subClassOf ex:Product .
# State that every product is exactly one of these types
[] owl:allDisjoint ( ex:Cheap ex:Midrange ex:Expensive ) .
ex:Product rdfs:subClassOf ex:Cheap or ex:Midrange or ex:Expensive .
# Define the classes in terms of the price
ex:price some (integer < 10) rdfs:subClassOf ex:Cheap .
ex:price some (integer >= 10 and integer < 50) rdfs:subClassOf ex:Midrange .
ex:price some (integer >= 50) rdfs:subClassOf ex:Expensive .
# Introduce each packaging type as a class
ex:Boxed a owl:Class ; rdfs:subClassOf ex:Order .
ex:Bottled a owl:Class ; rdfs:subClassOf ex:Order .
ex:Canned a owl:Class ; rdfs:subClassOf ex:Order .
# State that every order is exactly one of these types
[] owl:allDisjoint ( ex:Boxed ex:Bottled ex:Canned ) .
ex:Product rdfs:subClassOf ex:Bottled or ex:Bottled or ex:Canned .
# Define the classes in terms of the packaging word
ex:packaging some {"boxes", "box", "pkg.", "pkgs."} rdfs:subClassOf ex:Boxed .
ex:packaging some {"glasses", "bottles", "bottle", "jars"} rdfs:subClassOf ex:Bottled .
ex:packaging some {"tins", "cans"} rdfs:subClassOf ex:Canned .
-- Given hourly temperature forecasts:
CREATE TABLE temp_1h(lat float, lon float, time timestamp, temp float);
INSERT INTO temp_1h VALUES
(59.1, 10.2, '2022-03-22 12:00:00', 8.0),
(59.1, 10.2, '2022-03-22 13:00:00', 10.3),
(59.1, 10.2, '2022-03-22 14:00:00', 11.1),
(59.1, 10.2, '2022-03-22 15:00:00', 12.5),
(60.2, 10.5, '2022-03-22 12:00:00', 2.1),
(60.2, 10.5, '2022-03-22 13:00:00', 5.2),
(60.2, 10.5, '2022-03-22 14:00:00', 6.1),
(60.2, 10.5, '2022-03-22 15:00:00', 8.3);
CREATE VIEW temp_30m(lat, lon, time, temp) AS
WITH
temps(lat, lon, time, temp) AS (
SELECT t1.lat, t1.lon, t1.time + '30 min'::interval, (t1.temp + t2.temp)/2
FROM temp_1h AS t1 JOIN temp_1h AS t2
ON (t1.lat = t2.lat AND t1.lon = t2.lon AND t2.time = (t1.time + '1 hour'::interval))
UNION ALL
SELECT *
FROM temp_1h
)
SELECT * FROM temps
ORDER BY lat, lon, time;
owl:sameAs