Grafoscopio/src/Dataviz/OffshoreLeaksDB.class.st

623 lines
20 KiB
Smalltalk

"
I'm used to model, query and visualice the released information by the
International Consortium of Investigative Journalism (ICIJ).
The information downloaded and used here is based on the original
information available at
https://offshoreleaks.icij.org/about/download
"
Class {
#name : #OffshoreLeaksDB,
#superclass : #Object,
#classInstVars : [
'dataLocation',
'database'
],
#category : #'Dataviz-PanamaPapers'
}
{ #category : #'data visualization' }
OffshoreLeaksDB class >> addColorConventionsTo: aView [
"I draw a label box to explain the color conventios"
| labelsBox |
labelsBox := self colorCoventions.
labelsBox view: aView.
labelsBox textSize: 20.
labelsBox build.
labelsBox legendElement translateTo: 355@885.
^ aView.
]
{ #category : #'data visualization' }
OffshoreLeaksDB class >> choroplethWorldMap [
"I draw a choropleth world map where the intensity of color is given
according to the amount of registered offshores in each territory"
| view |
view := RTView new.
view @ RTDraggableView @ RTZoomableView.
self mappedTerritories do: [ :territory |
| elem |
elem := (RTSVGPath new
path: (territory map);
fillColor: (self colorFor: territory totalOffshores in: self colorPalette);
borderColor: Color black;
scale: 1)
element model: (
territory name, String cr,
"territory totalOffshores asString"
(self totalOffshoresAsStringFor: territory)).
elem @ RTPopup.
view add: elem.
].
^ view
]
{ #category : #'data visualization' }
OffshoreLeaksDB class >> choroplethWorldMapFor: territories [
"I draw a choropleth world map where the intensity of color is given
according to the amount of registered offshores in each territory"
| view |
view := RTView new.
view @ RTDraggableView @ RTZoomableView.
territories do: [ :territory |
| elem |
elem := (RTSVGPath new
path: (territory map);
fillColor: (self colorFor: territory totalOffshores in: self colorPalette);
borderColor: Color black;
scale: 1)
element model: (
territory name, String cr,
"territory totalOffshores asString"
(self totalOffshoresAsStringFor: territory)).
elem @ RTPopup.
view add: elem.
].
^ view
]
{ #category : #'data visualization' }
OffshoreLeaksDB class >> choroplethWorldMapFull [
"I draw a label box to explain the color conventios"
^ self addColorConventionsTo: self choroplethWorldMap.
]
{ #category : #'data visualization' }
OffshoreLeaksDB class >> choroplethWorldMapQuick [
| dataView |
dataView := FileSystem disk workingDirectory / 'territories.ston'.
dataView exists
ifFalse: [
self downloadTerritoriesDataView.
^ self addColorConventionsTo: (self choroplethWorldMapFor: self importTerritoriesData)
]
ifTrue: [ ^ self addColorConventionsTo: (self choroplethWorldMapFor: self importTerritoriesData) ]
]
{ #category : #'data visualization' }
OffshoreLeaksDB class >> colorCoventions [
"I draw a label box to explain the color conventios"
| title labelsBox labels |
title := 'Offshores amount by color'.
labels := #('Country not found in database'
'Between 1 and 9'
'Between 10 and 99'
'Between 100 and 999'
'Between 1000 and 9999'
'Between 10.000 and 99.999').
labelsBox := RTLegendBuilder new.
labelsBox addText: title.
labels do: [ :label |
labelsBox addColor: (self colorPalette at: (labels indexOf: label)) text: label].
^ labelsBox
]
{ #category : #'data visualization' }
OffshoreLeaksDB class >> colorFor: anIntegerOrNil in: aColorPalette [
anIntegerOrNil isNil | (anIntegerOrNil = 0) ifTrue: [ ^ aColorPalette at: 1 ].
(anIntegerOrNil between: 1 and: 9) ifTrue: [ ^ aColorPalette at: 2 ].
(anIntegerOrNil between: 10 and: 99) ifTrue: [ ^ aColorPalette at: 3 ].
(anIntegerOrNil between: 100 and: 999) ifTrue: [ ^ aColorPalette at: 4 ].
(anIntegerOrNil between: 1000 and: 9999) ifTrue: [ ^ aColorPalette at: 5 ].
(anIntegerOrNil between: 10000 and: 99000) ifTrue: [ ^ aColorPalette at: 6 ].
^ Color black.
]
{ #category : #'data visualization' }
OffshoreLeaksDB class >> colorPalette [
| startPalette endPalette |
startPalette := RTColorPalette sequential colors: 9 scheme:'Oranges'.
endPalette := Array new: 6.
endPalette
at: 1 put: Color veryVeryLightGray;
at: 2 put: (startPalette at: 2);
at: 3 put: (startPalette at: 3);
at: 4 put: (startPalette at: 5);
at: 5 put: (startPalette at: 7);
at: 6 put: (startPalette at: 9).
^ endPalette.
]
{ #category : #'data visualization' }
OffshoreLeaksDB class >> colorfulWorldMap [
"I'm just a test to see if the world map is working"
| view |
view := RTView new.
self mappedTerritories do: [ :territory |
| elem |
elem := (RTSVGPath new
path: (territory map);
fillColor: Color random;
borderColor: Color black;
scale: 1) element model: (territory name).
elem @ RTPopup.
view add: elem.
].
^ view
]
{ #category : #'data queries' }
OffshoreLeaksDB class >> countriesWithOffshores [
"I provide a list of the countries with offshore where the names of the countries
has been processed to match the ones that are known by Roassal"
^ self totalOffshoresByCountry keys
]
{ #category : #'data cleaning' }
OffshoreLeaksDB class >> countryNameReplacements [
"I take names as they appear in the database and translated to how they appear in
the worldmap."
| tmp1 |
tmp1 := Dictionary new.
tmp1
add: 'Antigua and Barb.' -> 'Antigua and Barbuda';
add: 'Bahamas' -> 'Bahamas, The';
add: 'Bosnia and Herz.' -> 'Bosnia and Herzegovina';
add: 'Bolivia, Plurinational State of' -> 'Bolivia';
add: 'Brunei Darussalam' -> 'Brunei';
add: 'Cayman Is.' -> 'Cayman Islands';
add: 'Congo, the Democratic Republic of the' -> 'Dem. Rep. Congo';
add: 'Czech Rep.' -> 'Czech Republic';
add: 'Dominican Rep.' -> 'Dominican Republic';
add: 'Korea, Democratic People''s Republic of' -> 'North Korea';
add: 'Korea, Republic of' -> 'South Korea';
add: 'Fr. Polynesia' -> 'French Polynesia';
add: 'Iran, Islamic Republic of' -> 'Iran';
add: 'Lao People''s Democratic Republic' -> 'Laos';
add: 'Macedonia, the Former Yugoslav Republic of' -> 'Macedonia';
add: 'Moldova, Republic of' -> 'Moldova';
add: 'Netherlands' -> 'The Netherlands';
add: 'Palestine, State of' -> 'Palestine';
add: 'Russian Federation' -> 'Russia';
add: 'Syrian Arab Republic' -> 'Syria';
add: 'Sint Maarten (Dutch part)' -> 'Saint Martin';
add: 'St. Kitts and Nevis' -> 'Saint Kitts and Nevis';
add: 'St. Vin. and Gren.' -> 'Saint Vincent and the Grenadines';
add: 'Tanzania, United Republic of' -> 'Tanzania';
add: 'Taiwan, Province of China' -> 'Taiwan';
add: 'Turks and Caicos Is.' -> 'Turks and Caicos Islands';
add: 'Viet Nam' -> 'Vietnam';
add: 'Venezuela, Bolivarian Republic of' -> 'Venezuela'.
^ tmp1 yourself
]
{ #category : #accessing }
OffshoreLeaksDB class >> dataLocation [
(FileLocator documents / 'Grafoscopio') ensureCreateDirectory.
(FileLocator documents / 'Grafoscopio' / 'Projects') ensureCreateDirectory.
(FileLocator documents / 'Grafoscopio' / 'Projects' / 'PanamaPapers') ensureCreateDirectory.
(FileLocator documents / 'Grafoscopio' / 'Projects' / 'PanamaPapers' / 'Data' ) ensureCreateDirectory.
dataLocation := FileLocator documents / 'Grafoscopio' / 'Projects' / 'PanamaPapers' / 'Data' / 'offshore-leaks.sqlite'.
^ dataLocation
]
{ #category : #accessing }
OffshoreLeaksDB class >> dataLocation: anObject [
dataLocation := anObject
]
{ #category : #accessing }
OffshoreLeaksDB class >> database [
database := UDBCSQLite3Connection on: dataLocation fullName.
^ database
]
{ #category : #accessing }
OffshoreLeaksDB class >> database: anObject [
database := anObject
]
{ #category : #metadata }
OffshoreLeaksDB class >> databaseMetaData [
"I define some metadata associated to the introductory document."
| metadata |
metadata := Dictionary new
at: 'type' put: 'Database';
at: 'shorcut' put: 'database';
at: 'website' put: 'https://datahub.io/dataset/panama-papers';
at: 'sha1' put: 'ebb8290bbaca3b32d98e1a15926c93c3a468e7eb';
at: 'downloadUrl' put: 'https://datahub.io/dataset/06f27df3-ec88-47ea-b428-7ec138f7835e/resource/50a9bda8-e44a-4aac-b265-d07fabde5612/download/offshore-leaks.sqlite.zip';
at: 'size' put: 54488249;
yourself.
^ metadata
]
{ #category : #'data queries' }
OffshoreLeaksDB class >> databaseMetrics [
"I return some metrics like table size in rows for all tables in the database"
| queryResults answer partial |
self dataLocation exists
ifFalse: [ self updateDatabase ]
ifTrue: [
queryResults := self tablesNames collect: [:tableName |
partial := (self database open execute: 'SELECT Count(*) AS size FROM ', tableName) rows collect: [:each | each data ].
(partial at: 1)
at: 'table' put: tableName;
yourself. ].
self database isOpen ifTrue: [ self database close ].
"Simplifiying the dictionary with the answers"
answer := Dictionary new.
queryResults do: [ :entry |
answer at: (entry at: 'table') put: (entry at: 'size') ].
^ answer]
]
{ #category : #initialization }
OffshoreLeaksDB class >> defineDocumentation [
"I model the important documents for this project."
| gfcDocumentation |
gfcDocumentation := GrafoscopioDocumentation new.
gfcDocumentation
name: 'offshoreLeaks';
repository: (FossilRepo new remote: 'http://mutabit.com/repos.fossil/panama-papers');
localPlace: FileLocator workingDirectory asFileReference
/'Grafoscopio'/'Packages'/'Dataviz'/ 'OffshoreLeaks'.
gfcDocumentation documents
add: 'panama-papers.ston';
add: 'territories.ston'.
gfcDocumentation localPlace.
^ gfcDocumentation
]
{ #category : #updating }
OffshoreLeaksDB class >> docDownloadFor: aDocumentType [
"I download the interactive documentation in STON format, according to the document
type which can be: 'tutorial' or 'manual'.
If a the documentation is already present in the system I made a temporal backup and
download a new copy"
| docInfo rootFolder localDoc temporalBackup remoteDoc |
(aDocumentType = 'intro') ifTrue: [ docInfo := self introMetaData ].
rootFolder := (self dataLocation parent parent).
localDoc := rootFolder fullName, '/', (docInfo at: 'relativePath'), (docInfo at: 'filename').
temporalBackup := rootFolder fullName, '/', (docInfo at: 'relativePath'), aDocumentType, '.temp.ston'.
remoteDoc :=
(docInfo at: 'remoteRepo'), 'doc/tip/',
(docInfo at: 'relativePath'), (docInfo at: 'filename').
localDoc asFileReference exists
ifTrue: [
temporalBackup asFileReference exists ifTrue: [ temporalBackup asFileReference delete].
localDoc asFileReference renameTo: aDocumentType, '.temp.ston'
].
GrafoscopioBrowser
downloadingFrom: remoteDoc
withMessage: 'Updating: ', aDocumentType,'...'
into: (rootFolder fullName, '/', (docInfo at: 'relativePath')).
]
{ #category : #updating }
OffshoreLeaksDB class >> downloadDatabase [
"I download the data of the panama papers from its page at the DataHub community repository:
https://datahub.io/dataset/panama-papers
"
| advancement currentSize |
(self dataLocation parent / 'offshore-leaks.sqlite.zip') ensureDelete.
advancement := 0.
[[ :bar |
bar title: 'Downloading database...'.
[
ZnClient new
url: (self databaseMetaData at: 'downloadUrl');
signalProgress: true;
downloadTo: self dataLocation parent
]
on: HTTPProgress
do: [ :progress |
(FileLocator temp / 'offshore-leaks.sqlite.zip') exists
ifTrue: [
currentSize := (FileLocator temp / 'offshore-leaks.sqlite.zip') size.
currentSize > 0 ifTrue: [advancement := (currentSize / (self databaseMetaData at: 'size')) * 100]
].
bar current: advancement.
progress resume ] ] asJob run] fork.
]
{ #category : #updating }
OffshoreLeaksDB class >> downloadTerritoriesDataView [
"I download the data view of the Panama Papers territories for quick visualization
"
| downloadUrl dataView |
dataView := FileSystem disk workingDirectory / 'territories.ston'.
dataView exists
ifTrue: [ self inform:
'Data view already downloaded in expected location. Delete it first from ', String cr,
dataView fullName, String cr,
' before download it again' ]
ifFalse: [
downloadUrl := 'http://mutabit.com/repos.fossil/panama-papers/doc/tip/territories.ston'.
GrafoscopioDockingBar
downloadingFrom: downloadUrl
withMessage: 'Downloading data view...'
into: FileSystem disk workingDirectory
].
]
{ #category : #updating }
OffshoreLeaksDB class >> downloadWorldMap [
"I download the World Map to be used.
Original Map is courtesy of Pareto Softare, LLC DBA Simplemaps.com,
relaeased under a MIT license"
(self dataLocation parent / 'Maps' / 'world.svg') asFileReference exists
ifTrue: [ self inform:
'Worldmap already downloaded in expected location. Delete it first from ', String cr,
self dataLocation parent fullName, String cr,
' before download it again' ]
ifFalse: [
self downloadingFrom: ''
withMessage: 'Downloading worldmap'
into: (self dataLocation parent / 'Maps')]
]
{ #category : #'data export' }
OffshoreLeaksDB class >> exportTerritoriesData [
"I export the territories data a file in STON format.
Useful for quick visualizations and data exchanges without downloading the full database"
| storage |
storage := (FileSystem disk workingDirectory / 'territories.ston') ensureCreateFile.
storage writeStreamDo: [:stream |
STON put: (OffshoreLeaksDB mappedTerritories) onStreamPretty: stream ].
self inform: 'Territories data exported as ', (storage fullPath).
]
{ #category : #'data import' }
OffshoreLeaksDB class >> importTerritoriesData [
| dataFile |
dataFile := FileSystem disk workingDirectory / 'territories.ston'.
dataFile exists
ifFalse: [self inform: 'File with territories data not found' ]
ifTrue: [ ^ STON fromString: dataFile contents ]
]
{ #category : #initialization }
OffshoreLeaksDB class >> initialize [
self defineDocumentation
]
{ #category : #metadata }
OffshoreLeaksDB class >> introMetaData [
"I define some metadata associated to the introductory document."
| metadata |
metadata := Dictionary
with: 'type' -> 'Grafoscopio document'
with: 'shorcut' -> 'intro'
with: 'remoteRepo' -> 'http://mutabit.com/repos.fossil/panama-papers/'
with: 'relativePath' -> ''
with: 'filename' -> 'panama-papers.ston'.
^ metadata
]
{ #category : #'data queries' }
OffshoreLeaksDB class >> mappedTerritories [
"I reduce the mismatch between names of territories mentioned in the Panama Papers
database and the ones in the worldmap"
| countries offshoresData |
countries := self mappedTerritoriesRaw.
offshoresData := self totalOffshoresByCountry.
countries do: [:c |
(self countryNameReplacements includesKey: c name)
ifTrue: [c name: (self countryNameReplacements at: c name)].
(offshoresData includesKey: c name)
ifTrue: [c totalOffshores: (offshoresData at: c name)]
].
^ countries
]
{ #category : #'data queries' }
OffshoreLeaksDB class >> mappedTerritoriesRaw [
"I return a list of all mappeable territories, no matter if they have been mentioned
in the Panama Papers or not."
| xmlStream xmlDoc map mappedTerritoriesRaw i |
xmlStream := (self dataLocation parent parent / 'Maps' / 'world.svg') asFileReference contents.
xmlDoc := XMLDOMParser parse: xmlStream.
map := xmlDoc allElementsNamed: 'path'.
mappedTerritoriesRaw := OrderedCollection new.
i := 0.
map contentNodesDo: [ :n |
mappedTerritoriesRaw add: (
Territory new
iso: (n attributeAt: 'id');
name: (n attributeAt: 'data-name');
map: (n attributeAt: 'd')).
i := i + 1.
].
^ mappedTerritoriesRaw
]
{ #category : #'data queries' }
OffshoreLeaksDB class >> oldQueries [
"I store a dictionary of the 'old queries' that were used to query
the first version of the offshores database stored at:
https://datahub.io/dataset/panama-papers
The dictioary contain as keys the name of the method where the key
was used and as value the respective query.
To see the curren implementation, browse the key.
"
| queries |
queries := Dictionary new.
queries
at: 'totalOffshoresByCountryRaw'
put: 'SELECT country_name, COUNT(*) AS "total_offshores" FROM
(SELECT country_name, Description_
FROM nodesNW
INNER JOIN node_countriesNW
ON nodesNW.Unique_ID = node_countriesNW.NODEID1
ORDER BY country_name)
GROUP BY country_name'.
^queries.
]
{ #category : #documents }
OffshoreLeaksDB class >> openIntroNotebook [
| docs |
docs := self defineDocumentation.
docs openNotebookAt: 1.
]
{ #category : #'data queries' }
OffshoreLeaksDB class >> tablesNames [
"I return the names of the tables of the SQLite database"
| answer query |
query := 'SELECT name
FROM sqlite_master
WHERE type="table"
ORDER BY name'.
self dataLocation exists
ifFalse: [ self updateDatabase ]
ifTrue: [
answer := (self database open execute: query) rows collect: [ :each | each data ].
self database isOpen ifTrue: [ self database close ].
^ answer collect: [:e | e at: 'name' ]
]
]
{ #category : #'data cleaning' }
OffshoreLeaksDB class >> totalOffshoresAsStringFor: aTerritory [
"I retunr 0 if the total offshores for aTerritory is nil or it integer value otherwise"
aTerritory totalOffshores isNil
ifTrue: [ ^ 0 asString ]
ifFalse: [ ^ aTerritory totalOffshores asString ]
]
{ #category : #'data queries' }
OffshoreLeaksDB class >> totalOffshoresByCountry [
"I return the total offshores by country cleaned"
| entries results |
entries := self totalOffshoresByCountryRaw.
results := Dictionary new.
entries do: [ :entry |
(self countryNameReplacements includesKey: (entry at: 'country_name'))
ifTrue: [
entry at: 'country_name'
put: (self countryNameReplacements at: (entry at: 'country_name'))].
entry at: 'country_name' put: (entry at: 'country_name')
].
entries do: [ :entry | results at: (entry at: 'country_name') put: (entry at: 'total_offshores') ].
^ results.
]
{ #category : #'data queries' }
OffshoreLeaksDB class >> totalOffshoresByCountryRaw [
"I query for the offshores by country data from a SQLite database file"
| query answer |
query := 'SELECT countries AS "country_name", count(countries) AS "total_offshores"
FROM Addresses
GROUP BY countries'.
self dataLocation exists
ifFalse: [ self inform: 'Download database first by running: ', String cr,
'"OffshoreLeaks updateDatabase"' ]
ifTrue: [
answer := (self database open execute: query) rows collect: [ :each | each data ].
self database isOpen ifTrue: [ self database close ].
^ answer
]
]
{ #category : #'data queries' }
OffshoreLeaksDB class >> totalOffshoresFor: aCountryName [
"I give the total amount of offshores companies for given country name.
This country is a String, that can contain whitespaces"
| result |
result := OffshoreLeaksDB totalOffshoresByCountry
select: [:entry | (entry at: 'country_name') = (aCountryName copyWithout: Character space) ].
result isEmpty
ifTrue: [ ^ nil ]
ifFalse: [ ^ (result at: 1) at: 'total_offshores' ]
]
{ #category : #'data queries' }
OffshoreLeaksDB class >> unmappedTerritories [
"I list all territories that are mentioned in the Panama Papers that are not part of the
original maps in Roassal or added into the maps of the Panama Papers class.
This keeps the code modular, but in the future Roassal should include more territories"
| unmapped mappedNames |
mappedNames := self mappedTerritories collect: [:e | e name].
unmapped := self countriesWithOffshores reject: [:eachCountry |
mappedNames includes: (eachCountry) ].
^ unmapped
]
{ #category : #updating }
OffshoreLeaksDB class >> unzipDatabase [
| zipFile |
zipFile := self dataLocation parent / 'offshore-leaks.sqlite.zip'.
zipFile exists
ifTrue: [(GrafoscopioNotebook SHA1For: zipFile is: (self databaseMetaData at: 'sha1'))
ifTrue: [
ZipArchive new
readFrom: zipFile;
extractAllTo: self dataLocation parent]
]
]
{ #category : #updating }
OffshoreLeaksDB class >> updateDatabase [
self downloadDatabase.
self dataLocation asFileReference ensureDelete.
self unzipDatabase.
]
{ #category : #updating }
OffshoreLeaksDB class >> updateDatabaseUI [
| answer |
self dataLocation asFileReference exists
ifTrue: [
answer := UIManager default
confirm:
'Database already in the system.' , String cr,
'Do you want to delete it a download a new one?'.
answer
ifFalse: [ ^ self ].
]
ifFalse: [self updateDatabase]
]
{ #category : #updating }
OffshoreLeaksDB class >> updateIntroNotebook [
self docDownloadFor: 'intro'
]