wiki:DatabaseCleanup

Database cleanup queries

All entities with more than one CURRENT_VERSION:

select count(*) as cnt, id, node_type from openmind.node
where system_status = 'CURRENT_VERSION'
group by id
having cnt > 1
order by cnt desc;

All entities with more than one node_type:

select count(distinct node_type) as cnt, id from openmind.node
group by id
having cnt > 1
order by 1 desc;

All entities with more than one object_class:

select count(distinct object_class) as cnt, id from openmind.node
group by id
having cnt > 1
order by 1 desc;

All relations without source or target:

select * from openmind.node where node_type = 'RELATION' and (source_id is null or target_id is null);

All relations with target pointing at nonexistent entity:

select * from openmind.node rel 
left outer join openmind.node ent
on rel.target_id = ent.id
where 
rel.node_type = 'RELATION'
and ent.id is null;

All current relations pointing at non-current target entities:

select * from openmind.node rel, openmind.node ent 
where 
rel.target_id = ent.id
and rel.target_modif = ent.modification_time
and rel.system_status = 'CURRENT_VERSION'
and ent.system_status != 'CURRENT_VERSION';

All relations that have no current version (could be deleted):

select id from openmind.node
where system_status = 'PREVIOUS_VERSION'
and node_type = 'RELATION'
and id not in
(select distinct id from openmind.node
where system_status = 'CURRENT_VERSION')

All non-current relations pointing at current entities:

select * from openmind.node rel, openmind.node src, openmind.node tar
where 
rel.target_id = tar.id
and rel.source_id = src.id
and rel.source_modif = src.modification_time
and rel.target_modif = tar.modification_time
and rel.system_status = 'PREVIOUS_VERSION'
and src.system_status = 'CURRENT_VERSION'
and tar.system_status = 'CURRENT_VERSION'
order by rel.id;

The number of attributes without source_id (where a different version has a source_id) grouped by source object type.

select count(*), source_obj_class from openmind.node
where 
node_type = 'ATTRIBUTE'
and 
source_id is not null
and id in
(select distinct id from openmind.node
where source_id is null
and node_type = 'ATTRIBUTE')
group by source_obj_class

The number of entries modified in a date range group by type.

select count(*), node_type from node
where 
modification_time > unix_timestamp('2016-07-27')*1000
and 
modification_time < unix_timestamp('2016-07-28')*1000
group by node_type

The number of REFERENCEs without attributes but with relations grouped by date.

select count(*), from_unixtime(modification_time/1000, '%Y-%m-%d')
from node e
where 
e.object_class = 'REFERENCE'
and system_status = 'CURRENT_VERSION'
and e.id not in (
select source_id from node
where node_type = 'ATTRIBUTE'
)
and e.id in (
select source_id from node
where node_type = 'RELATION'
)
group by from_unixtime(modification_time/1000, '%Y-%m-%d')
order by 2

DANGEROUS: Set system_status of every last version of is_reference_of relations with target 74095 to CURRENT_VERSION. (Needs two nested subselects and safe_mode=off)

update node
set system_status = 'CURRENT_VERSION'
-- select * from node
where (id, version) in 
(
select * from
(
select id, max(version)
from node 
where 
target_id = 74095
and object_class = 'is_reference_of'
group by id
) as d
)
Last modified 7 years ago Last modified on Jun 9, 2017, 2:28:51 PM