Data working group/Redactions/Log
Adding to this page will not get something redacted. You only want to add to here if you've generated a list of changesets of objects that have been redacted
May google cleanup
Goal: Remove google sourced data from user 227972
first pass
data that can be removed cleanly. needed: list of ways and nodes which have no parents ways or child nodes outside thatlist.
get preliminary list
CREATE TABLE redactable_ways
AS SELECT DISTINCT w.*
FROM ways w
JOIN way_nodes child_wn ON (w.id=child_wn.way_id) -- entries for every child node
LEFT JOIN nodes child_n ON (child_wn.node_id = child_n.id
AND child_n.version > 1) -- get null rows for v1 child nodes
WHERE w.user_id=227972 -- criteria
AND w.tags @> hstore('source','google')
AND w.version=1
AND child_n.id IS NULL; -- i.e. couldn't LEFT JOIN #1, so v1 child node
ANALYZE redactable_ways;
then run this until it isn't deleting to get rid of ways that have child nodes with parents outside the set
DELETE FROM redactable_ways
WHERE redactable_ways.id IN (SELECT DISTINCT w.id
FROM redactable_ways w
JOIN way_nodes child_wn ON (w.id=child_wn.way_id) -- entries for every child node
JOIN way_nodes remove_wn ON (child_wn.node_id=remove_wn.node_id)
LEFT JOIN redactable_ways remove_w ON (remove_wn.way_id=remove_w.id)
WHERE
remove_w.id IS NULL);
set formatting and output
\pset t on
\o pass1.txt
SELECT 'w' || id || 'v' || version FROM redactable_ways;
SELECT DISTINCT
ON (n.id)
'n' || n.id || 'v' || n.version
FROM redactable_ways w
JOIN way_nodes wn ON (w.id = wn.way_id)
JOIN nodes n ON (wn.node_id = n.id);
Post-process to remove whitespace, redact list
second pass
all nodes with tags @> hstore('source','google') AND user_id=227972, all versions of those nodes.
third pass
as with first, but without v1 restriction
CREATE TEMPORARY TABLE redactable_ways
AS SELECT DISTINCT w.*
FROM ways w
WHERE w.user_id=227972 -- criteria
AND w.tags @> hstore('source','google');
ANALYZE redactable_ways;
DELETE FROM redactable_ways
WHERE redactable_ways.id IN (SELECT DISTINCT w.id
FROM redactable_ways w
JOIN way_nodes child_wn ON (w.id=child_wn.way_id) -- entries for every child node
JOIN way_nodes remove_wn ON (child_wn.node_id=remove_wn.node_id)
LEFT JOIN redactable_ways remove_w ON (remove_wn.way_id=remove_w.id)
WHERE
remove_w.id IS NULL);
\pset t on
\o pass3.txt
SELECT 'w' || id || 'v' || generate_series(1,version) FROM redactable_ways;
SELECT 'n' || n.id || 'v' || generate_series(1,n.version)
FROM redactable_ways w
JOIN way_nodes wn ON (wn.way_id = w.id)
JOIN nodes n ON (wn.node_id = n.id)
ALK
ALK data redacted under id 14 on Aug 27, 2013, based on a changeset list from Henning.
GADM Ecuador
GADM-sourced admin boundaries in Ecuador.
first pass of admin_level=8
\pset t on
\o pass1.txt
SELECT
DISTINCT changeset_id
FROM relations
WHERE tags ? 'municipality_code'
AND tags @> hstore('source','www.gadm.org')
AND tags @> hstore('admin_level','8')
AND version = 1;
Manually identified
using overpass-turbo
<osm-script output="json">
<union>
<query type="relation">
<has-kv k="municipality_code"/>
<has-kv k="source" v="www.gadm.org"/>
</query>
</union>
<print mode="body"/>
<recurse type="down"/>
<print mode="skeleton"/>
</osm-script>
Manual redaction of
14332956 14340423 14356744 14331339 4506955 14346092 14336286 14399739 14334521 14335205
Brazil: Eduardo Francis Batista
User ID 45375 Complete removal of contributions
Skip 3246547 3233456
Google imagery_used
SELECT user_name, string_agg(id::text, ' ') AS ids FROM osm_changeset WHERE tags?'imagery_used' AND (tags->'imagery_used' LIKE '%google.com%' OR tags->'imagery_used' LIKE '%google.ru%' OR tags->'imagery_used' LIKE '%google.pl%' OR tags->'imagery_used' LIKE '%google.cz%') AND id > 18440203 GROUP BY user_name ORDER BY ids DESC;
max changeset in DB was 32592301
with changeset_md database updated to 131017
\pset t on
\o changesets.txt
SELECT id FROM osm_changeset
WHERE tags?'imagery_used' AND (tags->'imagery_used' LIKE '%google.com%' OR tags->'imagery_used' LIKE '%google.ru%')
ORDER BY id DESC;
Latest changeset: 18440203 Earliest changeset: 17293725
\pset t ON
\o changesets2.txt
SELECT id FROM osm_changeset
WHERE tags?'imagery_used'
AND NOT (tags->'imagery_used' LIKE '%google.com%' OR tags->'imagery_used' LIKE '%google.ru%')
AND (tags->'imagery_used' LIKE '%googleapis.com%' OR tags->'imagery_used' LIKE '%google.pl%' OR tags->'imagery_used' LIKE '%google.cz%')
ORDER BY id DESC;
Latest changeset: 18383297 Earliest changeset: 17975373
Bolivia GADM
First pass (changeset-based)
COPY (
SELECT changeset_id
FROM nodes n
JOIN (
SELECT DISTINCT unnest(w.nodes) AS node_id
FROM relations r
JOIN relation_members rm ON (rm.relation_id = r.id)
JOIN ways w ON (w.id = rm.member_id AND rm.member_type='W')
WHERE r.tags @> hstore('source','gadm.org')
AND r.tags @> hstore('boundary','administrative')
AND ST_IsValid(w.linestring)
AND ST_Contains(ST_SetSRID(ST_MakeBox2D(ST_Point(-70,-23),ST_Point(-57,-9)),4326), w.linestring)
) AS wn ON (wn.node_id = n.id)
WHERE n.id > 616295125 -- earliest node ID
AND n.id < 663658537 -- latest node ID
AND changeset_id >=3651611 -- first CS
AND changeset_id <=4088167 -- last CS
AND n.user_id = 77114
GROUP BY changeset_id
HAVING COUNT(*) > 10
ORDER BY changeset_id ASC) TO STDOUT
Import into changeset db, join against, filter to only have JOSM changsets, re-output.
2nd pass, object-based
COPY (
WITH
rrm AS ( -- relations and way members
SELECT DISTINCT r.id, r.version, rm.member_id
FROM relations r
JOIN relation_members rm ON (rm.relation_id = r.id)
JOIN ways w ON (w.id = rm.member_id AND rm.member_type='W')
WHERE r.tags @> hstore('source','gadm.org')
AND r.tags @> hstore('boundary','administrative')
AND r.tags @> hstore('admin_level','8')
AND ST_IsValid(w.linestring)
AND ST_Contains(ST_SetSRID(ST_MakeBox2D(ST_Point(-70,-23),ST_Point(-57,-9)),4326), w.linestring)),
rw AS (
SELECT DISTINCT w.*
FROM rrm
JOIN ways w ON (w.id = rrm.member_id)
WHERE w.changeset_id >= 3651611), -- get rid of unmodified ways
rwn AS (SELECT DISTINCT rw.id, rw.version, rw.changeset_id, unnest(rw.nodes) AS node_id FROM rw),
rn AS (
SELECT DISTINCT n.id, n.version
FROM rwn
JOIN nodes n ON (rwn.node_id = n.id)
WHERE
n.id BETWEEN 616624000 AND 663659000
AND n.changeset_id BETWEEN 3651610 AND 4088168) -- assume that modified nodes have been moved and are clean
SELECT 'n'||id||'v'||generate_series(1,version) AS obj FROM rn) TO STDOUT;
Redact list
COPY (
WITH
rrm AS ( -- relations and way members
SELECT DISTINCT r.id, r.version, rm.member_id
FROM relations r
JOIN relation_members rm ON (rm.relation_id = r.id)
JOIN ways w ON (w.id = rm.member_id AND rm.member_type='W')
WHERE r.tags @> hstore('source','gadm.org')
AND r.tags @> hstore('boundary','administrative')
AND r.tags @> hstore('admin_level','8')
AND ST_IsValid(w.linestring)
AND ST_Contains(ST_SetSRID(ST_MakeBox2D(ST_Point(-70,-23),ST_Point(-57,-9)),4326), w.linestring)),
rw AS (
SELECT DISTINCT w.*
FROM rrm
JOIN ways w ON (w.id = rrm.member_id)
WHERE w.changeset_id >= 3651611), -- get rid of unmodified ways
rwn AS (SELECT DISTINCT rw.id, rw.version, rw.changeset_id, unnest(rw.nodes) AS node_id FROM rw),
rn AS (
SELECT DISTINCT n.id, n.version
FROM rwn
JOIN nodes n ON (rwn.node_id = n.id)
WHERE
n.id BETWEEN 616624000 AND 663659000
AND n.changeset_id > 3651610
AND n.version > 1)
SELECT COUNT(*) FROM rn; 'n'||rn.id||'v'||generate_series(1,rn.version-1) AS obj
FROM rn ) TO STDOUT;
Redact list
COPY (
WITH
rrm AS ( -- relations and way members
SELECT DISTINCT r.id, r.version, rm.member_id
FROM relations r
JOIN relation_members rm ON (rm.relation_id = r.id)
JOIN ways w ON (w.id = rm.member_id AND rm.member_type='W')
WHERE r.tags @> hstore('source','gadm.org')
AND r.tags @> hstore('boundary','administrative')
AND r.tags @> hstore('admin_level','8')
AND ST_IsValid(w.linestring)
AND ST_Contains(ST_SetSRID(ST_MakeBox2D(ST_Point(-70,-23),ST_Point(-57,-9)),4326), w.linestring)),
rw AS (
SELECT DISTINCT w.*
FROM rrm
JOIN ways w ON (w.id = rrm.member_id)
WHERE w.changeset_id >= 3651611), -- get rid of unmodified ways
rw_with_kept_parents AS (
SELECT rw.*
FROM rw
JOIN relation_members rm ON (rw.id = rm.member_id AND rm.member_type='W')
LEFT JOIN rrm ON (rm.relation_id = rrm.id)
WHERE rrm.id IS NULL)
SELECT 'w'||rw.id||'v'||generate_series(1,rw.version) AS obj FROM rw LEFT JOIN rw_with_kept_parents USING (id) WHERE rw_with_kept_parents.id IS NULL ) TO STDOUT;
Redact list
HERE
SELECT id FROM osm_changeset
WHERE tags?'imagery_used' AND (tags->'imagery_used') ilike '%here%' order by id;
max ID 21463494
Google imagery_used 2
SELECT id,user_name,tags->'imagery_used' as imagery_used FROM osm_changeset
WHERE tags?'imagery_used'
AND (tags->'imagery_used' LIKE '%googleapis.com%' OR tags->'imagery_used' LIKE '%google.%')
AND id > 18440203
ORDER BY id asc;
max ID 20479773
WDPA
COPY (
SELECT changeset_id
FROM nodes n
JOIN (
SELECT DISTINCT unnest(w.nodes) AS node_id
FROM relations r
JOIN relation_members rm ON (rm.relation_id = r.id)
JOIN ways w ON (w.id = rm.member_id AND rm.member_type='W')
WHERE r.tags @> hstore('source','www.wdpa.org')
AND ST_IsValid(w.linestring)
AND w.changeset_id = r.changeset_id
AND r.version=1
AND w.version=1
) AS wn ON (wn.node_id = n.id)
WHERE user_id = 77114
AND version = 1
GROUP BY changeset_id
HAVING COUNT(*) > 75
ORDER BY changeset_id ASC)
TO STDOUT;
cladr / КЛАДР
TODO - see [EN:КЛАДР]
In essence:
All tags matching cladr:* contributed by the user KekcuHa (ran as an import bot)
== Google imagery_used 3 Using the regex from API capabilities
\pset t on
\o changesets.txt
SELECT id FROM osm_changeset
WHERE tags?'imagery_used'
AND tags->'imagery_used' ~ '.*\.google(apis)?\..*/(vt|kh)[\?/].*([xyz]=.*){3}.*'
AND id > 20479773
ORDER BY id DESC;
Max ID 44588069