diff --git a/src/Dataviz/DataSelfie.class.st b/src/Dataviz/DataSelfie.class.st new file mode 100644 index 0000000..47f4639 --- /dev/null +++ b/src/Dataviz/DataSelfie.class.st @@ -0,0 +1,16 @@ +" +I represent a data portrait taken from twitter account. +My helper classes are profileOverview and historyGraph. +For mockups of how I would look like see the online repo at: + +http://mutabit.com/repos.fossil/data-selfies +" +Class { + #name : #DataSelfie, + #superclass : #Object, + #instVars : [ + 'profileOverview', + 'historyGraph' + ], + #category : #'Dataviz-Twitter' +} diff --git a/src/Dataviz/DatavizDocs.class.st b/src/Dataviz/DatavizDocs.class.st new file mode 100644 index 0000000..5ba19e4 --- /dev/null +++ b/src/Dataviz/DatavizDocs.class.st @@ -0,0 +1,26 @@ +" +I define the documentation for the Dataviz package. +" +Class { + #name : #DatavizDocs, + #superclass : #GrafoscopioDocumentation, + #category : #Dataviz +} + +{ #category : #initialization } +DatavizDocs >> initialize [ + super initialize. + + name := 'dataviz'. + repository := (FossilRepo new remote: 'http://mutabit.com/repos.fossil/grafoscopio'). + localPlace := FileLocator workingDirectory asFileReference /'Grafoscopio'. + self documents + add: 'Packages/Dataviz/dataviz.ston'. +] + +{ #category : #operation } +DatavizDocs >> openIntroNotebook [ + "I'm just an alias to ease the operation. I need to know wich is the index of the notebook + I want to open, as defined on initialize method" + self openNotebookAt: 1. +] diff --git a/src/Dataviz/ManifestDataviz.class.st b/src/Dataviz/ManifestDataviz.class.st index 1c148a6..6d2c94f 100644 --- a/src/Dataviz/ManifestDataviz.class.st +++ b/src/Dataviz/ManifestDataviz.class.st @@ -7,12 +7,37 @@ Class { #category : #Dataviz } +{ #category : #'code-critics' } +ManifestDataviz class >> ruleRBCollectionMessagesToExternalObjectRuleV1FalsePositive [ + ^ #(#(#(#RGMethodDefinition #(#'DatavizUtils class' #defineDocumentation #true)) #'2017-07-04T14:43:28.224565-05:00') ) +] + +{ #category : #'code-critics' } +ManifestDataviz class >> ruleRBCollectionProtocolRuleV1FalsePositive [ + ^ #(#(#(#RGMethodDefinition #(#'PanamaPapers class' #colorfulWorldMap #true)) #'2016-04-27T15:26:43.983557-05:00') #(#(#RGMethodDefinition #(#'PanamaPapers class' #choroplethWorldMap #true)) #'2016-04-28T14:44:05.895182-05:00') ) +] + +{ #category : #'code-critics' } +ManifestDataviz class >> ruleRBPrecedenceRuleV1FalsePositive [ + ^ #(#(#(#RGMethodDefinition #(#TRArcShape #surroundedBy:radialGap:angularGap:renderedIn: #false)) #'2016-07-30T15:45:56.472232-05:00') ) +] + { #category : #'code-critics' } ManifestDataviz class >> ruleRBSentNotImplementedRuleV1FalsePositive [ ^ #(#(#(#RGClassDefinition #(#PublishedMedInfo)) #'2015-12-06T12:21:46.001583-05:00') ) ] +{ #category : #'code-critics' } +ManifestDataviz class >> ruleRBStringConcatenationRuleV1FalsePositive [ + ^ #(#(#(#RGMethodDefinition #(#'PanamaPapers class' #unmappedTerritories #true)) #'2016-04-26T14:51:29.568883-05:00') ) +] + { #category : #'code-critics' } ManifestDataviz class >> ruleRBUnclassifiedMethodsRuleV1FalsePositive [ ^ #(#(#(#RGClassDefinition #(#MedAgency)) #'2015-12-17T23:52:38.455558-05:00') ) ] + +{ #category : #'code-critics' } +ManifestDataviz class >> ruleRTInvocationSequenceRuleV1FalsePositive [ + ^ #(#(#(#RGMethodDefinition #(#TwitterProfileOverview #renderIn: #false)) #'2016-06-21T12:50:53.810131-05:00') ) +] diff --git a/src/Dataviz/OffshoreLeaksDB.class.st b/src/Dataviz/OffshoreLeaksDB.class.st new file mode 100644 index 0000000..6b8f152 --- /dev/null +++ b/src/Dataviz/OffshoreLeaksDB.class.st @@ -0,0 +1,622 @@ +" +I'm used to model, query and visualice the released information by the +International Consortium of Investigative Journalism (ICIJ). +The information downloaded and used here is based on the original +information available at + +https://offshoreleaks.icij.org/about/download + + +" +Class { + #name : #OffshoreLeaksDB, + #superclass : #Object, + #classInstVars : [ + 'dataLocation', + 'database' + ], + #category : #'Dataviz-PanamaPapers' +} + +{ #category : #'data visualization' } +OffshoreLeaksDB class >> addColorConventionsTo: aView [ + "I draw a label box to explain the color conventios" + + | labelsBox | + + labelsBox := self colorCoventions. + labelsBox view: aView. + labelsBox textSize: 20. + labelsBox build. + labelsBox legendElement translateTo: 355@885. + ^ aView. +] + +{ #category : #'data visualization' } +OffshoreLeaksDB class >> choroplethWorldMap [ + "I draw a choropleth world map where the intensity of color is given + according to the amount of registered offshores in each territory" + | view | + view := RTView new. + view @ RTDraggableView @ RTZoomableView. + self mappedTerritories do: [ :territory | + | elem | + elem := (RTSVGPath new + path: (territory map); + fillColor: (self colorFor: territory totalOffshores in: self colorPalette); + borderColor: Color black; + scale: 1) + element model: ( + territory name, String cr, + "territory totalOffshores asString" + (self totalOffshoresAsStringFor: territory)). + elem @ RTPopup. + view add: elem. + ]. + ^ view +] + +{ #category : #'data visualization' } +OffshoreLeaksDB class >> choroplethWorldMapFor: territories [ + "I draw a choropleth world map where the intensity of color is given + according to the amount of registered offshores in each territory" + | view | + view := RTView new. + view @ RTDraggableView @ RTZoomableView. + territories do: [ :territory | + | elem | + elem := (RTSVGPath new + path: (territory map); + fillColor: (self colorFor: territory totalOffshores in: self colorPalette); + borderColor: Color black; + scale: 1) + element model: ( + territory name, String cr, + "territory totalOffshores asString" + (self totalOffshoresAsStringFor: territory)). + elem @ RTPopup. + view add: elem. + ]. + ^ view +] + +{ #category : #'data visualization' } +OffshoreLeaksDB class >> choroplethWorldMapFull [ + "I draw a label box to explain the color conventios" + + ^ self addColorConventionsTo: self choroplethWorldMap. +] + +{ #category : #'data visualization' } +OffshoreLeaksDB class >> choroplethWorldMapQuick [ + + | dataView | + dataView := FileSystem disk workingDirectory / 'territories.ston'. + dataView exists + ifFalse: [ + self downloadTerritoriesDataView. + ^ self addColorConventionsTo: (self choroplethWorldMapFor: self importTerritoriesData) + ] + ifTrue: [ ^ self addColorConventionsTo: (self choroplethWorldMapFor: self importTerritoriesData) ] + +] + +{ #category : #'data visualization' } +OffshoreLeaksDB class >> colorCoventions [ + "I draw a label box to explain the color conventios" + + | title labelsBox labels | + title := 'Offshores amount by color'. + labels := #('Country not found in database' + 'Between 1 and 9' + 'Between 10 and 99' + 'Between 100 and 999' + 'Between 1000 and 9999' + 'Between 10.000 and 99.999'). + labelsBox := RTLegendBuilder new. + labelsBox addText: title. + labels do: [ :label | + labelsBox addColor: (self colorPalette at: (labels indexOf: label)) text: label]. + ^ labelsBox +] + +{ #category : #'data visualization' } +OffshoreLeaksDB class >> colorFor: anIntegerOrNil in: aColorPalette [ + anIntegerOrNil isNil | (anIntegerOrNil = 0) ifTrue: [ ^ aColorPalette at: 1 ]. + (anIntegerOrNil between: 1 and: 9) ifTrue: [ ^ aColorPalette at: 2 ]. + (anIntegerOrNil between: 10 and: 99) ifTrue: [ ^ aColorPalette at: 3 ]. + (anIntegerOrNil between: 100 and: 999) ifTrue: [ ^ aColorPalette at: 4 ]. + (anIntegerOrNil between: 1000 and: 9999) ifTrue: [ ^ aColorPalette at: 5 ]. + (anIntegerOrNil between: 10000 and: 99000) ifTrue: [ ^ aColorPalette at: 6 ]. + ^ Color black. +] + +{ #category : #'data visualization' } +OffshoreLeaksDB class >> colorPalette [ + | startPalette endPalette | + startPalette := RTColorPalette sequential colors: 9 scheme:'Oranges'. + endPalette := Array new: 6. + endPalette + at: 1 put: Color veryVeryLightGray; + at: 2 put: (startPalette at: 2); + at: 3 put: (startPalette at: 3); + at: 4 put: (startPalette at: 5); + at: 5 put: (startPalette at: 7); + at: 6 put: (startPalette at: 9). + ^ endPalette. +] + +{ #category : #'data visualization' } +OffshoreLeaksDB class >> colorfulWorldMap [ + "I'm just a test to see if the world map is working" + | view | + view := RTView new. + self mappedTerritories do: [ :territory | + | elem | + elem := (RTSVGPath new + path: (territory map); + fillColor: Color random; + borderColor: Color black; + scale: 1) element model: (territory name). + elem @ RTPopup. + view add: elem. + ]. + ^ view +] + +{ #category : #'data queries' } +OffshoreLeaksDB class >> countriesWithOffshores [ + "I provide a list of the countries with offshore where the names of the countries + has been processed to match the ones that are known by Roassal" + + + ^ self totalOffshoresByCountry keys + +] + +{ #category : #'data cleaning' } +OffshoreLeaksDB class >> countryNameReplacements [ + "I take names as they appear in the database and translated to how they appear in + the worldmap." + | tmp1 | + tmp1 := Dictionary new. + tmp1 + add: 'Antigua and Barb.' -> 'Antigua and Barbuda'; + add: 'Bahamas' -> 'Bahamas, The'; + add: 'Bosnia and Herz.' -> 'Bosnia and Herzegovina'; + add: 'Bolivia, Plurinational State of' -> 'Bolivia'; + add: 'Brunei Darussalam' -> 'Brunei'; + add: 'Cayman Is.' -> 'Cayman Islands'; + add: 'Congo, the Democratic Republic of the' -> 'Dem. Rep. Congo'; + add: 'Czech Rep.' -> 'Czech Republic'; + add: 'Dominican Rep.' -> 'Dominican Republic'; + add: 'Korea, Democratic People''s Republic of' -> 'North Korea'; + add: 'Korea, Republic of' -> 'South Korea'; + add: 'Fr. Polynesia' -> 'French Polynesia'; + add: 'Iran, Islamic Republic of' -> 'Iran'; + add: 'Lao People''s Democratic Republic' -> 'Laos'; + add: 'Macedonia, the Former Yugoslav Republic of' -> 'Macedonia'; + add: 'Moldova, Republic of' -> 'Moldova'; + add: 'Netherlands' -> 'The Netherlands'; + add: 'Palestine, State of' -> 'Palestine'; + add: 'Russian Federation' -> 'Russia'; + add: 'Syrian Arab Republic' -> 'Syria'; + add: 'Sint Maarten (Dutch part)' -> 'Saint Martin'; + add: 'St. Kitts and Nevis' -> 'Saint Kitts and Nevis'; + add: 'St. Vin. and Gren.' -> 'Saint Vincent and the Grenadines'; + add: 'Tanzania, United Republic of' -> 'Tanzania'; + add: 'Taiwan, Province of China' -> 'Taiwan'; + add: 'Turks and Caicos Is.' -> 'Turks and Caicos Islands'; + add: 'Viet Nam' -> 'Vietnam'; + add: 'Venezuela, Bolivarian Republic of' -> 'Venezuela'. + ^ tmp1 yourself +] + +{ #category : #accessing } +OffshoreLeaksDB class >> dataLocation [ + (FileLocator documents / 'Grafoscopio') ensureCreateDirectory. + (FileLocator documents / 'Grafoscopio' / 'Projects') ensureCreateDirectory. + (FileLocator documents / 'Grafoscopio' / 'Projects' / 'PanamaPapers') ensureCreateDirectory. + (FileLocator documents / 'Grafoscopio' / 'Projects' / 'PanamaPapers' / 'Data' ) ensureCreateDirectory. + dataLocation := FileLocator documents / 'Grafoscopio' / 'Projects' / 'PanamaPapers' / 'Data' / 'offshore-leaks.sqlite'. + ^ dataLocation +] + +{ #category : #accessing } +OffshoreLeaksDB class >> dataLocation: anObject [ + dataLocation := anObject +] + +{ #category : #accessing } +OffshoreLeaksDB class >> database [ + database := UDBCSQLite3Connection on: dataLocation fullName. + ^ database +] + +{ #category : #accessing } +OffshoreLeaksDB class >> database: anObject [ + database := anObject +] + +{ #category : #metadata } +OffshoreLeaksDB class >> databaseMetaData [ + "I define some metadata associated to the introductory document." + + | metadata | + + metadata := Dictionary new + at: 'type' put: 'Database'; + at: 'shorcut' put: 'database'; + at: 'website' put: 'https://datahub.io/dataset/panama-papers'; + at: 'sha1' put: 'ebb8290bbaca3b32d98e1a15926c93c3a468e7eb'; + at: 'downloadUrl' put: 'https://datahub.io/dataset/06f27df3-ec88-47ea-b428-7ec138f7835e/resource/50a9bda8-e44a-4aac-b265-d07fabde5612/download/offshore-leaks.sqlite.zip'; + at: 'size' put: 54488249; + yourself. + ^ metadata +] + +{ #category : #'data queries' } +OffshoreLeaksDB class >> databaseMetrics [ + "I return some metrics like table size in rows for all tables in the database" + + | queryResults answer partial | + self dataLocation exists + ifFalse: [ self updateDatabase ] + ifTrue: [ + queryResults := self tablesNames collect: [:tableName | + partial := (self database open execute: 'SELECT Count(*) AS size FROM ', tableName) rows collect: [:each | each data ]. + (partial at: 1) + at: 'table' put: tableName; + yourself. ]. + self database isOpen ifTrue: [ self database close ]. + "Simplifiying the dictionary with the answers" + answer := Dictionary new. + queryResults do: [ :entry | + answer at: (entry at: 'table') put: (entry at: 'size') ]. + ^ answer] +] + +{ #category : #initialization } +OffshoreLeaksDB class >> defineDocumentation [ + "I model the important documents for this project." + | gfcDocumentation | + gfcDocumentation := GrafoscopioDocumentation new. + gfcDocumentation + name: 'offshoreLeaks'; + repository: (FossilRepo new remote: 'http://mutabit.com/repos.fossil/panama-papers'); + localPlace: FileLocator workingDirectory asFileReference + /'Grafoscopio'/'Packages'/'Dataviz'/ 'OffshoreLeaks'. + gfcDocumentation documents + add: 'panama-papers.ston'; + add: 'territories.ston'. + gfcDocumentation localPlace. + ^ gfcDocumentation +] + +{ #category : #updating } +OffshoreLeaksDB class >> docDownloadFor: aDocumentType [ + "I download the interactive documentation in STON format, according to the document + type which can be: 'tutorial' or 'manual'. + If a the documentation is already present in the system I made a temporal backup and + download a new copy" + + | docInfo rootFolder localDoc temporalBackup remoteDoc | + + (aDocumentType = 'intro') ifTrue: [ docInfo := self introMetaData ]. + rootFolder := (self dataLocation parent parent). + localDoc := rootFolder fullName, '/', (docInfo at: 'relativePath'), (docInfo at: 'filename'). + temporalBackup := rootFolder fullName, '/', (docInfo at: 'relativePath'), aDocumentType, '.temp.ston'. + remoteDoc := + (docInfo at: 'remoteRepo'), 'doc/tip/', + (docInfo at: 'relativePath'), (docInfo at: 'filename'). + localDoc asFileReference exists + ifTrue: [ + temporalBackup asFileReference exists ifTrue: [ temporalBackup asFileReference delete]. + localDoc asFileReference renameTo: aDocumentType, '.temp.ston' + ]. + GrafoscopioBrowser + downloadingFrom: remoteDoc + withMessage: 'Updating: ', aDocumentType,'...' + into: (rootFolder fullName, '/', (docInfo at: 'relativePath')). +] + +{ #category : #updating } +OffshoreLeaksDB class >> downloadDatabase [ + "I download the data of the panama papers from its page at the DataHub community repository: + https://datahub.io/dataset/panama-papers + " + | advancement currentSize | + (self dataLocation parent / 'offshore-leaks.sqlite.zip') ensureDelete. + advancement := 0. + [[ :bar | + bar title: 'Downloading database...'. + [ + ZnClient new + url: (self databaseMetaData at: 'downloadUrl'); + signalProgress: true; + downloadTo: self dataLocation parent + ] + on: HTTPProgress + do: [ :progress | + (FileLocator temp / 'offshore-leaks.sqlite.zip') exists + ifTrue: [ + currentSize := (FileLocator temp / 'offshore-leaks.sqlite.zip') size. + currentSize > 0 ifTrue: [advancement := (currentSize / (self databaseMetaData at: 'size')) * 100] + ]. + bar current: advancement. + progress resume ] ] asJob run] fork. +] + +{ #category : #updating } +OffshoreLeaksDB class >> downloadTerritoriesDataView [ + "I download the data view of the Panama Papers territories for quick visualization + " + | downloadUrl dataView | + dataView := FileSystem disk workingDirectory / 'territories.ston'. + dataView exists + ifTrue: [ self inform: + 'Data view already downloaded in expected location. Delete it first from ', String cr, + dataView fullName, String cr, + ' before download it again' ] + ifFalse: [ + downloadUrl := 'http://mutabit.com/repos.fossil/panama-papers/doc/tip/territories.ston'. + GrafoscopioDockingBar + downloadingFrom: downloadUrl + withMessage: 'Downloading data view...' + into: FileSystem disk workingDirectory + ]. +] + +{ #category : #updating } +OffshoreLeaksDB class >> downloadWorldMap [ + "I download the World Map to be used. + Original Map is courtesy of Pareto Softare, LLC DBA Simplemaps.com, + relaeased under a MIT license" + (self dataLocation parent / 'Maps' / 'world.svg') asFileReference exists + ifTrue: [ self inform: + 'Worldmap already downloaded in expected location. Delete it first from ', String cr, + self dataLocation parent fullName, String cr, + ' before download it again' ] + ifFalse: [ + self downloadingFrom: '' + withMessage: 'Downloading worldmap' + into: (self dataLocation parent / 'Maps')] +] + +{ #category : #'data export' } +OffshoreLeaksDB class >> exportTerritoriesData [ + "I export the territories data a file in STON format. + Useful for quick visualizations and data exchanges without downloading the full database" + + | storage | + storage := (FileSystem disk workingDirectory / 'territories.ston') ensureCreateFile. + storage writeStreamDo: [:stream | + STON put: (OffshoreLeaksDB mappedTerritories) onStreamPretty: stream ]. + self inform: 'Territories data exported as ', (storage fullPath). +] + +{ #category : #'data import' } +OffshoreLeaksDB class >> importTerritoriesData [ + | dataFile | + dataFile := FileSystem disk workingDirectory / 'territories.ston'. + dataFile exists + ifFalse: [self inform: 'File with territories data not found' ] + ifTrue: [ ^ STON fromString: dataFile contents ] +] + +{ #category : #initialization } +OffshoreLeaksDB class >> initialize [ + self defineDocumentation +] + +{ #category : #metadata } +OffshoreLeaksDB class >> introMetaData [ + "I define some metadata associated to the introductory document." + + | metadata | + + metadata := Dictionary + with: 'type' -> 'Grafoscopio document' + with: 'shorcut' -> 'intro' + with: 'remoteRepo' -> 'http://mutabit.com/repos.fossil/panama-papers/' + with: 'relativePath' -> '' + with: 'filename' -> 'panama-papers.ston'. + ^ metadata +] + +{ #category : #'data queries' } +OffshoreLeaksDB class >> mappedTerritories [ + "I reduce the mismatch between names of territories mentioned in the Panama Papers + database and the ones in the worldmap" + | countries offshoresData | + countries := self mappedTerritoriesRaw. + offshoresData := self totalOffshoresByCountry. + countries do: [:c | + (self countryNameReplacements includesKey: c name) + ifTrue: [c name: (self countryNameReplacements at: c name)]. + (offshoresData includesKey: c name) + ifTrue: [c totalOffshores: (offshoresData at: c name)] + ]. + ^ countries +] + +{ #category : #'data queries' } +OffshoreLeaksDB class >> mappedTerritoriesRaw [ + "I return a list of all mappeable territories, no matter if they have been mentioned + in the Panama Papers or not." + + | xmlStream xmlDoc map mappedTerritoriesRaw i | + xmlStream := (self dataLocation parent parent / 'Maps' / 'world.svg') asFileReference contents. + xmlDoc := XMLDOMParser parse: xmlStream. + map := xmlDoc allElementsNamed: 'path'. + mappedTerritoriesRaw := OrderedCollection new. + i := 0. + map contentNodesDo: [ :n | + mappedTerritoriesRaw add: ( + Territory new + iso: (n attributeAt: 'id'); + name: (n attributeAt: 'data-name'); + map: (n attributeAt: 'd')). + i := i + 1. + ]. + ^ mappedTerritoriesRaw +] + +{ #category : #'data queries' } +OffshoreLeaksDB class >> oldQueries [ + "I store a dictionary of the 'old queries' that were used to query + the first version of the offshores database stored at: + + https://datahub.io/dataset/panama-papers + + The dictioary contain as keys the name of the method where the key + was used and as value the respective query. + To see the curren implementation, browse the key. + " + | queries | + queries := Dictionary new. + queries + at: 'totalOffshoresByCountryRaw' + put: 'SELECT country_name, COUNT(*) AS "total_offshores" FROM + (SELECT country_name, Description_ + FROM nodesNW + INNER JOIN node_countriesNW + ON nodesNW.Unique_ID = node_countriesNW.NODEID1 + ORDER BY country_name) + GROUP BY country_name'. + ^queries. +] + +{ #category : #documents } +OffshoreLeaksDB class >> openIntroNotebook [ + | docs | + docs := self defineDocumentation. + docs openNotebookAt: 1. +] + +{ #category : #'data queries' } +OffshoreLeaksDB class >> tablesNames [ + "I return the names of the tables of the SQLite database" + | answer query | + query := 'SELECT name + FROM sqlite_master + WHERE type="table" + ORDER BY name'. + self dataLocation exists + ifFalse: [ self updateDatabase ] + ifTrue: [ + answer := (self database open execute: query) rows collect: [ :each | each data ]. + self database isOpen ifTrue: [ self database close ]. + ^ answer collect: [:e | e at: 'name' ] + ] +] + +{ #category : #'data cleaning' } +OffshoreLeaksDB class >> totalOffshoresAsStringFor: aTerritory [ + "I retunr 0 if the total offshores for aTerritory is nil or it integer value otherwise" + + aTerritory totalOffshores isNil + ifTrue: [ ^ 0 asString ] + ifFalse: [ ^ aTerritory totalOffshores asString ] +] + +{ #category : #'data queries' } +OffshoreLeaksDB class >> totalOffshoresByCountry [ + "I return the total offshores by country cleaned" + | entries results | + entries := self totalOffshoresByCountryRaw. + results := Dictionary new. + entries do: [ :entry | + (self countryNameReplacements includesKey: (entry at: 'country_name')) + ifTrue: [ + entry at: 'country_name' + put: (self countryNameReplacements at: (entry at: 'country_name'))]. + entry at: 'country_name' put: (entry at: 'country_name') + ]. + entries do: [ :entry | results at: (entry at: 'country_name') put: (entry at: 'total_offshores') ]. + ^ results. +] + +{ #category : #'data queries' } +OffshoreLeaksDB class >> totalOffshoresByCountryRaw [ + "I query for the offshores by country data from a SQLite database file" + | query answer | + query := 'SELECT countries AS "country_name", count(countries) AS "total_offshores" + FROM Addresses + GROUP BY countries'. + self dataLocation exists + ifFalse: [ self inform: 'Download database first by running: ', String cr, + '"OffshoreLeaks updateDatabase"' ] + ifTrue: [ + answer := (self database open execute: query) rows collect: [ :each | each data ]. + self database isOpen ifTrue: [ self database close ]. + ^ answer + ] +] + +{ #category : #'data queries' } +OffshoreLeaksDB class >> totalOffshoresFor: aCountryName [ + "I give the total amount of offshores companies for given country name. + This country is a String, that can contain whitespaces" + + | result | + result := OffshoreLeaksDB totalOffshoresByCountry + select: [:entry | (entry at: 'country_name') = (aCountryName copyWithout: Character space) ]. + result isEmpty + ifTrue: [ ^ nil ] + ifFalse: [ ^ (result at: 1) at: 'total_offshores' ] +] + +{ #category : #'data queries' } +OffshoreLeaksDB class >> unmappedTerritories [ + "I list all territories that are mentioned in the Panama Papers that are not part of the + original maps in Roassal or added into the maps of the Panama Papers class. + This keeps the code modular, but in the future Roassal should include more territories" + + | unmapped mappedNames | + mappedNames := self mappedTerritories collect: [:e | e name]. + unmapped := self countriesWithOffshores reject: [:eachCountry | + mappedNames includes: (eachCountry) ]. + ^ unmapped + +] + +{ #category : #updating } +OffshoreLeaksDB class >> unzipDatabase [ + | zipFile | + zipFile := self dataLocation parent / 'offshore-leaks.sqlite.zip'. + zipFile exists + ifTrue: [(GrafoscopioNotebook SHA1For: zipFile is: (self databaseMetaData at: 'sha1')) + ifTrue: [ + ZipArchive new + readFrom: zipFile; + extractAllTo: self dataLocation parent] + ] +] + +{ #category : #updating } +OffshoreLeaksDB class >> updateDatabase [ + self downloadDatabase. + self dataLocation asFileReference ensureDelete. + self unzipDatabase. +] + +{ #category : #updating } +OffshoreLeaksDB class >> updateDatabaseUI [ + | answer | + self dataLocation asFileReference exists + ifTrue: [ + answer := UIManager default + confirm: + 'Database already in the system.' , String cr, + 'Do you want to delete it a download a new one?'. + answer + ifFalse: [ ^ self ]. + ] + ifFalse: [self updateDatabase] +] + +{ #category : #updating } +OffshoreLeaksDB class >> updateIntroNotebook [ + self docDownloadFor: 'intro' +] diff --git a/src/Dataviz/PublishedMedInfo.class.st b/src/Dataviz/PublishedMedInfo.class.st index 3aff0e4..13bab04 100644 --- a/src/Dataviz/PublishedMedInfo.class.st +++ b/src/Dataviz/PublishedMedInfo.class.st @@ -61,7 +61,6 @@ PublishedMedInfo >> addLegendTo: aView titled: aString withData: anArray withCol PublishedMedInfo >> addLineSeparatorsTo: aView withData: data columnsDistance: aDistance centerSized: internalRadio ringSized: ringSize [ | e1 e2 ang | 1 to: data children size do: [ :i | - Transcript show: i. e1 := (RTBox new size: 1) element. ang := (i * 360 / data children size) degreesToRadians. e1 translateTo: (internalRadio * ang cos)@(internalRadio * ang sin). @@ -263,7 +262,7 @@ PublishedMedInfo >> exploreMatrix: aMatrix by: type coloredWith: aColorPalette ]. b interaction noInteractions. type asLowercase = 'country' ifTrue: [ - b interaction + b interaction addInteraction: (RTPopup text: [:d | (self medDataKeys at: ((aMatrix size) - d level + anInteger + 1)), '-> ', d header]). ]. diff --git a/src/Dataviz/TPMessages.class.st b/src/Dataviz/TPMessages.class.st new file mode 100644 index 0000000..4958706 --- /dev/null +++ b/src/Dataviz/TPMessages.class.st @@ -0,0 +1,137 @@ +" +I represent the general stats for the kind of messages a given profile +account, being them: twets, retweets and replies. + +I'm useful for general overviews of a twitter account in a given period of +time or in general. +" +Class { + #name : #TPMessages, + #superclass : #Object, + #instVars : [ + 'tweets', + 'retweets', + 'replies' + ], + #category : #'Dataviz-Twitter' +} + +{ #category : #accessing } +TPMessages >> asAssociations [ + | d | + d := Dictionary new. + d + at: 'tweets' put: self tweetsSize; + at: 'retweets' put: self retweetsSize; + at: 'replies' put: self repliesSize. + ^ d associations. + +] + +{ #category : #accessing } +TPMessages >> asDictionary [ + | d | + d := Dictionary new. + d + at: 'tweets' put: self tweetsSize; + at: 'retweets' put: self retweetsSize; + at: 'replies' put: self repliesSize. + ^ d. + +] + +{ #category : #initialization } +TPMessages >> initialize [ + super initialize. +] + +{ #category : #'data queries' } +TPMessages >> mentionedProfilesByFrequency [ + | words builder mentions allScreenNames | + self replies ifNil: [ ^ self ]. + words := ''. + mentions := self replies collect: [:reply | + ((reply at: 'entities') at: 'user_mentions') ]. + allScreenNames := mentions collect: [:mention | + mention size > 0 + ifTrue: [(mention collect: [:each | each at: 'screen_name' ])]]. + allScreenNames do: [ :each | each ifNotNil: [ each do: [:e | words := words, ' ',e ]]]. + builder := RTNameCloud new. + builder dictionary: "RTEnglishDictionary new unnecessaryWords," self class new. + builder addString: words. + ^ builder sortedAssociations +] + +{ #category : #accessing } +TPMessages >> replies [ + ^ replies +] + +{ #category : #accessing } +TPMessages >> replies: anOrderedCollection [ + replies := anOrderedCollection +] + +{ #category : #accessing } +TPMessages >> repliesSize [ + self replies ifNotNil: [ ^ self replies size ] ifNil: [ ^ 0 ]. +] + +{ #category : #accessing } +TPMessages >> retweets [ + ^ retweets +] + +{ #category : #accessing } +TPMessages >> retweets: anOrderedCollection [ + retweets := anOrderedCollection +] + +{ #category : #accessing } +TPMessages >> retweetsSize [ + ^ self retweets size +] + +{ #category : #accessing } +TPMessages >> tweets [ + ^ tweets +] + +{ #category : #accessing } +TPMessages >> tweets: anOrderedCollection [ + tweets := anOrderedCollection +] + +{ #category : #accessing } +TPMessages >> tweetsSize [ + ^ self tweets size. +] + +{ #category : #utility } +TPMessages >> visualWordsFrom: aWordsCollection colored: aColor [ + "I take a collection of words and turn them into visual words that can be put into a canvas" + | visualWords | + visualWords := OrderedCollection new. + aWordsCollection do: [ :each | | label | + label := TRRotatedLabelShape new + text: each asString; + color: aColor. + visualWords add: label]. + ^ visualWords + + +] + +{ #category : #'data queries' } +TPMessages >> wordsByFrequencyInTweets [ + | words builder | + self tweets ifNil: [ ^ self ]. + words := ''. + self tweets do: [ :tweet | + words := words, ' ', (tweet message). + ]. + builder := RTNameCloud new. + builder dictionary: "RTEnglishDictionary new unnecessaryWords," self class new. + builder addString: words. + ^ builder sortedAssociations +] diff --git a/src/Dataviz/TRArcShape.extension.st b/src/Dataviz/TRArcShape.extension.st new file mode 100644 index 0000000..7eaa54e --- /dev/null +++ b/src/Dataviz/TRArcShape.extension.st @@ -0,0 +1,21 @@ +Extension { #name : #TRArcShape } + +{ #category : #'*DataViz' } +TRArcShape >> surroundedBy: anArray radialGap: aDistance angularGap: anAngle renderedIn: aCanvas [ + "I put TR shape objects stored in anArray equally separeted around the external part of an RTArc. + Notice that the array must contain TR shapes" + + | sep start end | + end := self betaAngle - anAngle. + start := self alphaAngle + anAngle. + sep := (start - end) / (anArray size - 1). + anArray doWithIndex: [ :each :i | | angle | + angle := (start - (i - 1 * sep)) negated. + each class = TRRotatedLabelShape + ifTrue: [ each angleInDegree: angle + ((angle between: -270 and: -90) ifTrue: [ 180 ] ifFalse: [ 0 ]) ]. + aCanvas addShape: (each + translateBy: (Point r: (self externalRadius + aDistance) theta: angle degreesToRadians)) + ]. + aCanvas addShape: self. + ^ aCanvas +] diff --git a/src/Dataviz/Territory.class.st b/src/Dataviz/Territory.class.st new file mode 100644 index 0000000..e578d82 --- /dev/null +++ b/src/Dataviz/Territory.class.st @@ -0,0 +1,66 @@ +" +I model the territories (mostly countries) mentioned in the Panama Papers. +" +Class { + #name : #Territory, + #superclass : #Object, + #instVars : [ + 'name', + 'map', + 'altname', + 'totalOffshores', + 'iso' + ], + #category : #'Dataviz-PanamaPapers' +} + +{ #category : #accessing } +Territory >> altname [ + ^ altname +] + +{ #category : #accessing } +Territory >> altname: anObject [ + altname := anObject +] + +{ #category : #accessing } +Territory >> iso [ + "I return the two letters ISO code for a country/territory" + ^ iso +] + +{ #category : #accessing } +Territory >> iso: anObject [ + iso := anObject +] + +{ #category : #accessing } +Territory >> map [ + ^ map +] + +{ #category : #accessing } +Territory >> map: anObject [ + map := anObject +] + +{ #category : #accessing } +Territory >> name [ + ^ name +] + +{ #category : #accessing } +Territory >> name: anObject [ + name := anObject +] + +{ #category : #accessing } +Territory >> totalOffshores [ + ^ totalOffshores +] + +{ #category : #accessing } +Territory >> totalOffshores: anObject [ + totalOffshores := anObject +] diff --git a/src/Dataviz/Tweet.class.st b/src/Dataviz/Tweet.class.st index 28f5c63..9c0ad6b 100644 --- a/src/Dataviz/Tweet.class.st +++ b/src/Dataviz/Tweet.class.st @@ -15,7 +15,9 @@ Class { 'mentions', 'links', 'hashtags', - 'type' + 'type', + 'retweetedUser', + 'repliedUsers' ], #category : #'Dataviz-Twitter' } @@ -30,6 +32,50 @@ Tweet >> date: anObject [ date := anObject ] +{ #category : #'data scrapping' } +Tweet >> detectMessageTypeFrom: aJSONSnippet [ + "Given aJSONSnippet containing a Tweet message data, I detect the type of message inside, + between tweet, retweet or reply to conver it to a native Tweet object" + (aJSONSnippet keys includes: 'in_reply_to_status_id') + ifTrue: [ + self + type: 'reply'; + url: '/', (self profile, '/status/', (aJSONSnippet at: 'id_str')) ] + ifFalse: [(aJSONSnippet keys includes: 'retweeted_status') + ifTrue: [ + self + type: 'retweet'; + url: '/', + (((aJSONSnippet at: 'retweeted_status') at: 'user') at: 'screen_name'), + '/status/', + ((aJSONSnippet at: 'retweeted_status') at: 'id_str') ] + ifFalse: [ + self + type: 'tweet'; + url: '/', (self profile, '/status/', (aJSONSnippet at: 'id_str')) ] ]. +] + +{ #category : #'data scrapping' } +Tweet >> detectRepliedProfilesFrom: aJSONSnippet [ + "Given aJSONSnippet containing a retweet message data, I detect the profiles that were mentioned." + self repliedUsers: (aJSONSnippet at: 'in_reply_to_screen_name' ifAbsent: [ self repliedUsers: nil ]) + "| initalMentions | + initalMentions := (aJSONSnippet at: 'entities') at: 'user_mentions'. + initalMentions isEmpty + ifFalse: [ self mentions: nil ] + ifTrue: [ + ) ]." +] + +{ #category : #'data scrapping' } +Tweet >> detectRetweetedProfileFrom: aJSONSnippet [ + "Given aJSONSnippet containing a retweet message data, I detect the profile that was retweeted." + (aJSONSnippet keys includes: 'retweeted_status') + ifFalse: [ self retweetedUser: nil ] + ifTrue: [ + self retweetedUser: (((aJSONSnippet at: 'retweeted_status') at: 'user') at: 'screen_name') ]. +] + { #category : #accessing } Tweet >> hashtags [ ^ hashtags @@ -214,6 +260,26 @@ Tweet >> profile: anObject [ profile := anObject ] +{ #category : #accessing } +Tweet >> repliedUsers [ + ^ repliedUsers +] + +{ #category : #accessing } +Tweet >> repliedUsers: anObject [ + repliedUsers := anObject +] + +{ #category : #accessing } +Tweet >> retweetedUser [ + ^ retweetedUser +] + +{ #category : #accessing } +Tweet >> retweetedUser: anObject [ + retweetedUser := anObject +] + { #category : #'data scrapping' } Tweet >> scrapDataFromUrl: aTweetUrl [ "Scraps most of the data in the page of a aTweetUrl. Most of the tweets are prestored now, but in the future @@ -328,54 +394,71 @@ Tweet >> showInView: aView sized: aSize [ ] { #category : #'data visualization' } -Tweet >> silenceMapFor: arg1 [ - | tmp1 tmp2 tmp3 tmp4 tmp5 tmp6 tmp7 tmp8 tmp9 tmp11 tmp13 tmp15 tmp17 tmp19 tmp21 | - self scrapDataFromUrl: arg1. - tmp5 := (Date today - self date) days. - tmp7 := self impactFor: arg1. - tmp11 := RTBox new. - tmp11 color: Color red. - tmp3 := tmp11 size: 200. - tmp3 := tmp3 element. - tmp3 translateBy: (tmp5 * -50) @ 0. - tmp1 := RTView new. - tmp6 := self mentionsClusterSeparated: 70 inView: tmp1. - tmp2 := self showInView: tmp1 sized: 35. - tmp2 translateBy: (tmp5 * -25) @ -600. - tmp1 add: tmp3. - tmp4 := RTEdge from: tmp6 to: tmp3. - tmp13 := RTGradientColoredLine new. - tmp13 - colors: (Array with: (Color white alpha: 0.3) with: (Color red alpha: 0.9)); - precision: 100; - width: 20. - tmp1 add: tmp4 + tmp13 gradientColorShape. - tmp15 := RTLabel new. - tmp15 text: tmp5 asString , ' días sin respuesta'. - tmp8 := (tmp15 height: tmp5 * 2.5) element @ RTDraggable. - tmp8 translateBy: (tmp5 * -25) @ -105. - tmp17 := RTLabel new. - tmp17 - text: 'al ' , Date today asString , ' y contando...'; - height: tmp5 * 1. - tmp9 := (tmp17 color: Color gray) element @ RTDraggable. - tmp9 translateBy: (tmp5 * -15) @ 50. - tmp1 - add: tmp8; - add: tmp9. - tmp19 := RTLabelled new. - tmp19 - text: (tmp7 at: 'retweeters') asString , ' retweets'; - fontSize: tmp5 * 2. - tmp3 @ (tmp19 color: Color gray). - tmp21 := RTLabelled new. - tmp21 - text: (tmp7 at: 'reach') asString , ' lectores' , String cr , '(max)'; - fontSize: tmp5 * 2; - color: Color gray. - tmp3 @ tmp21 below. - tmp1 view canvas focusOnCenterScaled. - ^ tmp1 @ RTDraggableView +Tweet >> silenceMapFor: aTweetUrl [ + + "Creates a visualization of how long a tweet has not been answered (any kind of answer: not favs, + not RT, no nothing)" + + | v tweet impact line timeUnanswered mentionedAvatars dummyData lineLabelUp lineLabelDown | + + self scrapDataFromUrl: aTweetUrl. + timeUnanswered := (Date today - self date) days. + + dummyData := self impactFor: aTweetUrl. + + "Impact box" + impact := RTBox new + color: (Color red); + size: 200. + + impact := impact element . + impact translateBy: (timeUnanswered*(-50))@0. + + "Adding objects to the view, except avatars" + v := RTView new. + mentionedAvatars := self mentionsClusterSeparated: 70 inView: v. + tweet := self showInView: v sized: 35. + tweet translateBy: (timeUnanswered*(-25))@(-600). + v add: impact. + "Line" + line := RTEdge from: mentionedAvatars to: impact. + v add: (line + (RTGradientColoredLine new + colors: (Array with: (Color white alpha:0.3) with: (Color red alpha:0.9)); + precision: 100; + width: 20; + gradientColorShape)). + + "Adding line labels" + lineLabelUp := (RTLabel new + text: timeUnanswered asString, ' días sin respuesta'; + height: timeUnanswered * 2.5) element @ RTDraggable. + lineLabelUp translateBy: (timeUnanswered*(-25))@(-105). + + lineLabelDown := (RTLabel new + text: 'al ', Date today asString, ' y contando...'; + height: timeUnanswered * 1; + color: Color gray) element @ RTDraggable. + lineLabelDown translateBy: (timeUnanswered*(-15))@(50). + + v add: lineLabelUp; add: lineLabelDown. + + + impact @ (RTLabelled new + text: (((dummyData at: 'retweeters') asString), ' retweets'); + fontSize: timeUnanswered * 2; + color: Color gray). + + impact @ (RTLabelled new + text: (((dummyData at: 'reach') asString), ' lectores', String cr, '(max)'); + fontSize: timeUnanswered * 2; + color: Color gray; + below). + + "Showing the canvas" + + v view canvas focusOnCenterScaled. + ^ v @ RTDraggableView. + ] { #category : #'as yet unclassified' } diff --git a/src/Dataviz/TwitterInteractionProfile.class.st b/src/Dataviz/TwitterInteractionProfile.class.st new file mode 100644 index 0000000..4679a61 --- /dev/null +++ b/src/Dataviz/TwitterInteractionProfile.class.st @@ -0,0 +1,56 @@ +" +I represent the interactions of a particular twitter profile with other +profiles. +I'm used for TwitterProfileOverview visualizations. +" +Class { + #name : #TwitterInteractionProfile, + #superclass : #Object, + #instVars : [ + 'screenName', + 'avatar', + 'tweets', + 'retweets' + ], + #category : #'Dataviz-Twitter' +} + +{ #category : #accessing } +TwitterInteractionProfile >> avatar [ + ^ avatar +] + +{ #category : #accessing } +TwitterInteractionProfile >> avatar: anObject [ + avatar := anObject +] + +{ #category : #accessing } +TwitterInteractionProfile >> retweets [ + ^ retweets +] + +{ #category : #accessing } +TwitterInteractionProfile >> retweets: anObject [ + retweets := anObject +] + +{ #category : #accessing } +TwitterInteractionProfile >> screenName [ + ^ screenName +] + +{ #category : #accessing } +TwitterInteractionProfile >> screenName: anObject [ + screenName := anObject +] + +{ #category : #accessing } +TwitterInteractionProfile >> tweets [ + ^ tweets +] + +{ #category : #accessing } +TwitterInteractionProfile >> tweets: anObject [ + tweets := anObject +] diff --git a/src/Dataviz/TweetsCollection.class.st b/src/Dataviz/TwitterMessages.class.st similarity index 62% rename from src/Dataviz/TweetsCollection.class.st rename to src/Dataviz/TwitterMessages.class.st index 07373e6..5adec68 100644 --- a/src/Dataviz/TweetsCollection.class.st +++ b/src/Dataviz/TwitterMessages.class.st @@ -24,24 +24,25 @@ Internal Representation and Key Implementation Points. Implementation Points " Class { - #name : #TweetsCollection, + #name : #TwitterMessages, #superclass : #Object, #instVars : [ - 'tweets' + 'messages' ], #category : #'Dataviz-Twitter' } { #category : #'data visualization' } -TweetsCollection >> activityHistogramFor: aProfileName in: aDataBaseFile [ - "I draw a histogram of the tweeter activity for a given profile name with data stored in aDataBaseFile. - The database stores the individual tweets for this profile, with their type (tweet, retweet or reply), - unique url and date. +TwitterMessages >> activityHistogramFor: aProfileName in: aDataBaseFile [ + "I draw a histogram of the tweeter activity for a given profile name with data stored in + aDataBaseFile. + The database stores the individual tweets for this profile, with their type (tweet, retweet + or reply), unique url and date. A proper schema of the data base still needs to be published. Is the one used in all references to aDataBaseFile." | sample activityDataArray monthOfFirstTweet activityDataCollection histogramData plot | - sample := TweetsCollection new. + sample := TwitterMessages new. activityDataArray := sample monthlyActivityDataFor: aProfileName in: aDataBaseFile. (activityDataArray size > 0) ifFalse: [ @@ -83,15 +84,20 @@ TweetsCollection >> activityHistogramFor: aProfileName in: aDataBaseFile [ ] ] -{ #category : #'data queries' } -TweetsCollection >> importTweetsFromJSONFile: aJSONFile [ +{ #category : #'as yet unclassified' } +TwitterMessages >> ifEmpty: aBlockClosure [ + self messages ifEmpty: aBlockClosure. +] + +{ #category : #'data scrapping' } +TwitterMessages >> importFromJSONLocalFile: aFilePath [ "I import all the tweets for aJSONFile and convert them in tweets inside a TweetCollection" | stream truncated jsonData currentTweet | - stream := aJSONFile readStream. - "We need to truncate the original file to quite the first line, which is the name of the exported array, so NeoJSONReader doesn't complain" + stream := aFilePath readStream. + "We need to truncate the original file to quite the first line, which is the name of the + exported array, so NeoJSONReader doesn't complain" truncated := WriteStream on: String new. stream contents lines allButFirstDo: [ :each | truncated nextPutAll: each ]. - jsonData := NeoJSONReader fromString: truncated contents asString. jsonData do: [:each | currentTweet := Tweet new. @@ -99,42 +105,42 @@ TweetsCollection >> importTweetsFromJSONFile: aJSONFile [ message: (each at: 'text'); profile: ((each at: 'user') at: 'screen_name'); date: ((each at: 'created_at') copyFrom: 1 to: 19) asDateAndTime. - "Detecting the kind of message and processing accordingly" - (each keys includes: 'in_reply_to_status_id') - ifTrue: [ - currentTweet - type: 'reply'; - url: '/', (currentTweet profile, '/status/', (each at: 'id_str'))] - ifFalse: [(each keys includes: 'retweeted_status') - ifTrue: [ - currentTweet - type: 'retweet'; - url: '/', - (((each at: 'retweeted_status') at: 'user') at: 'screen_name'), - '/status/', - ((each at: 'retweeted_status') at: 'id_str'). - ] - ifFalse: [ - currentTweet - type: 'tweet'; - url: '/', (currentTweet profile, '/status/', (each at: 'id_str')) ] - ]. + currentTweet detectMessageTypeFrom: each. + currentTweet detectRetweetedProfileFrom: each. + currentTweet detectRepliedProfilesFrom: each. "Detecting hashtags" "(((each at: 'entities') at: 'hashtags') size > 0) ifTrue: [ (each at: 'entities') at: 'hashtags' ]." - self tweets add: currentTweet. + self messages add: currentTweet. ]. ] +{ #category : #'data scrapping' } +TwitterMessages >> importFromJSONRemoteFile: aFileUrl [ + "I import all the tweets for aFileUrl and convert them in tweets inside a TweetCollection" + | tweetsFile | + tweetsFile := FileLocator temp asFileReference / (aFileUrl splitOn: '/') last. + tweetsFile exists ifTrue: [ tweetsFile delete ]. + ZnClient new + get: aFileUrl; + downloadTo: FileLocator temp asFileReference. + self importFromJSONLocalFile: tweetsFile. +] + +{ #category : #'as yet unclassified' } +TwitterMessages >> isEmpty [ + self messages isEmpty. +] + { #category : #'data queries' } -TweetsCollection >> loadTweetsFor: aProfileName from: aDataBaseFile [ +TwitterMessages >> loadTweetsFor: aProfileName from: aDataBaseFile [ "I select all the tweets for aProfileName in a given database" | db queryResults temporalTweet | "openning connection" - db := NBSQLite3Connection on: aDataBaseFile. + db := UDBCSQLite3Connection on: aDataBaseFile. db open. "Querying the data base" queryResults := (db execute: 'select * from tweets where profile="',aProfileName,'";') rows. @@ -144,7 +150,7 @@ TweetsCollection >> loadTweetsFor: aProfileName from: aDataBaseFile [ temporalTweet := Tweet new. temporalTweet url: (each at: 'url'); - date: (TimeStamp fromUnixTime: (each at: 'date')) asUTC; + date: (TimeStampMethodConverter fromUnixTime: (each at: 'date')) asUTC; type: (each at: 'type'); message: (each at: 'message'); profile: (each at: 'profile'). @@ -153,14 +159,36 @@ TweetsCollection >> loadTweetsFor: aProfileName from: aDataBaseFile [ ] { #category : #'data queries' } -TweetsCollection >> monthlyActivityDataFor: aProfileName in: aDataBaseFile [ +TwitterMessages >> mentionedProfilesByFrequencyUpTo: aPercentage [ + "I shown the words by frequency stripped until a percentage of the total size of such words + is reached." + | totalSize copiedSize queryResults | + queryResults := self repliedProfilesByFrequency. + totalSize := queryResults size. + copiedSize := (totalSize * aPercentage / 100) floor. + ^ queryResults copyFrom: 1 to: copiedSize. + +] + +{ #category : #accessing } +TwitterMessages >> messages [ + ^ messages ifNil: [messages := OrderedCollection new] +] + +{ #category : #accessing } +TwitterMessages >> messages: anOrderedCollection [ + messages := anOrderedCollection +] + +{ #category : #'data queries' } +TwitterMessages >> monthlyActivityDataFor: aProfileName in: aDataBaseFile [ "I present a histogram of the tweets that differenciates tweets, retweets and replies, for a given profile in a given SQLite database (for the moment I supposse that the profile exist there and data base schema is correct)" | db queryResults firstMonth lastMonth currentMonth activityCalendar monthOfFirstTweet | "openning connection" - db := NBSQLite3Connection on: aDataBaseFile. + db := UDBCSQLite3Connection on: aDataBaseFile. db open. "Querying the data base" db execute: 'create temporary table profile_tweets as select * from tweets where profile="',aProfileName,'";'. @@ -211,11 +239,11 @@ TweetsCollection >> monthlyActivityDataFor: aProfileName in: aDataBaseFile [ ] { #category : #'data storage / persistance' } -TweetsCollection >> populateDataBase: aDataBaseFile [ +TwitterMessages >> populateDataBase: aDataBaseFile [ "I populate a SQLite database file with my tweets data" | db | "openning connection" - db := NBSQLite3Connection on: aDataBaseFile. + db := UDBCSQLite3Connection on: aDataBaseFile. db open. "Creating the data base tweets schema" db execute: @@ -239,13 +267,60 @@ TweetsCollection >> populateDataBase: aDataBaseFile [ db close. ] +{ #category : #'as yet unclassified' } +TwitterMessages >> repliedProfilesByFrequency [ + | words builder replies | + replies := (self splitByType at: 'replies') ifNil: [ ^ self ]. + words := ''. + replies do: [ :message | + words := words, ' ', (message repliedUsers) ]. + builder := RTNameCloud new. + builder dictionary: "RTEnglishDictionary new unnecessaryWords," self class new. + builder addString: words. + ^ builder sortedAssociations +] + +{ #category : #'data queries' } +TwitterMessages >> repliesSize [ + ^ self sizesByType at: 'repliesSize'. +] + +{ #category : #'data queries' } +TwitterMessages >> retweetedProfilesByFrequency [ + | words builder retweets | + retweets := (self splitByType at: 'retweets') ifNil: [ ^ self ]. + words := ''. + retweets do: [ :message | + words := words, ' ', (message retweetedUser) ]. + builder := RTNameCloud new. + builder dictionary: "RTEnglishDictionary new unnecessaryWords," self class new. + builder addString: words. + ^ builder sortedAssociations +] + +{ #category : #'data queries' } +TwitterMessages >> retweetedProfilesByFrequencyUpTo: aPercentage [ + "I shown the words by frequency stripped until a percentage of the total size of such words + is reached." + | totalSize copiedSize queriedArray | + queriedArray := self retweetedProfilesByFrequency. + totalSize := queriedArray size. + copiedSize := (totalSize * aPercentage / 100) floor. + ^ queriedArray copyFrom: 1 to: copiedSize. +] + +{ #category : #'data queries' } +TwitterMessages >> retweetsSize [ + ^ self sizesByType at: 'retweetsSize'. +] + { #category : #'data visualization' } -TweetsCollection >> ringOverview [ +TwitterMessages >> ringOverview [ "I present a overview of the tweets as a ring that differenciates tweets, retweets and replies" | totalTweets replies retweets ring | replies := 0. retweets := 0. - tweets do: [ :each | + messages do: [ :each | (each type = 'reply') ifTrue: [replies := replies + 1]. (each type = 'retweets') ifTrue: [retweets := retweets + 1]]. totalTweets := (self tweets size) - replies - retweets. @@ -264,7 +339,7 @@ TweetsCollection >> ringOverview [ ] { #category : #'data visualization' } -TweetsCollection >> ringOverviewFor: aProfileName in: aDataBaseFile [ +TwitterMessages >> ringOverviewFor: aProfileName in: aDataBaseFile [ "I present a overview of the tweets as a ring that differenciates tweets, retweets and replies, for a given profile in a given SQLite database (for the moment I supposse that the profile exist there and data base schema is correct)" @@ -298,7 +373,7 @@ TweetsCollection >> ringOverviewFor: aProfileName in: aDataBaseFile [ ] { #category : #'data scrapping' } -TweetsCollection >> scrapTweetsFromHtmlFile: aHtmlFile [ +TwitterMessages >> scrapTweetsFromHtmlFile: aHtmlFile [ "I scraps tweets from a downloaded html file. On how to download such file for any given public twitter profile look at: http://blog.databigbang.com/scraping-web-sites-which-dynamically-load-data/ @@ -334,12 +409,69 @@ TweetsCollection >> scrapTweetsFromHtmlFile: aHtmlFile [ self tweets: tweetsTemp. ] -{ #category : #accessing } -TweetsCollection >> tweets [ - ^ tweets ifNil: [tweets := OrderedCollection new] +{ #category : #'data queries' } +TwitterMessages >> sizesByType [ + "I split the messages making differences between tweets, retweets and replies" + | splitted | + splitted := self splitByType. + ^ Dictionary new + at: 'tweetsSize' put: (splitted at: 'tweets') size; + at: 'retweetsSize' put: (splitted at: 'retweets') size; + at: 'repliesSize' put: (splitted at: 'replies') size; + yourself ] -{ #category : #accessing } -TweetsCollection >> tweets: anOrderedCollection [ - tweets := anOrderedCollection +{ #category : #'data queries' } +TwitterMessages >> splitByType [ + "I split the messages making differences between tweets, retweets and replies" + | retweets replies tweets | + retweets := self messages select: [ :message | message type = 'retweet' ]. + replies := self messages select: [ :message | message type = 'reply' ]. + tweets := self messages copyWithoutAll: (retweets, replies ). + ^ Dictionary new + at: 'tweets' put: tweets; + at: 'retweets' put: retweets; + at: 'replies' put: replies; + yourself +] + +{ #category : #utility } +TwitterMessages >> sumSplittedSizes [ + ^ self splitByType sum: [ :each | each size ] +] + +{ #category : #'data queries' } +TwitterMessages >> tweetsSize [ + ^ self sizesByType at: 'tweetsSize'. +] + +{ #category : #utility } +TwitterMessages >> unnecessaryWords [ + ^ #('a' 'amp' 'ante' 'así' 'cc' 'con' 'como' 'cuando' 'de' 'del' 'dentro' 'desde' 'el' 'en' 'En' 'es' 'está' 'ha' 'han' 'hay' 'la' 'La' 'las' 'Las' 'lo' 'los' 'más' 'mi' 'ni' 'No' 'nos' 'o' 'On' 'p' 'para' 'Para' 'por' 'q' 'que' 'quienes' 'quieres' 'RT' 'se' 'sea' 'Si' 'sin' 'son' 'su' 'sus' 'tan' 'tu' 'un' 'una' 'vía' 'we' 'y' 'yo' 'z') + , RTEnglishDictionary new unnecessaryWords +] + +{ #category : #'data queries' } +TwitterMessages >> wordsByFrequencyInTweets [ + | words builder | + self messages ifNil: [ ^ self ]. + words := ''. + self messages do: [ :tweet | + words := words, ' ', (tweet message) ]. + builder := RTNameCloud new. + builder dictionary: self class new. + builder addString: words. + ^ builder sortedAssociations +] + +{ #category : #'data queries' } +TwitterMessages >> wordsByFrequencyInTweetsUpTo: aPercentage [ + "I shown the words by frequency stripped until a percentage of the total size of such words + is reached." + | totalSize copiedSize queryResults | + queryResults := self wordsByFrequencyInTweets. + totalSize := queryResults size. + copiedSize := (totalSize * aPercentage / 100) floor. + ^ queryResults copyFrom: 1 to: copiedSize. + ] diff --git a/src/Dataviz/TwitterMessagesTest.class.st b/src/Dataviz/TwitterMessagesTest.class.st new file mode 100644 index 0000000..c3b5123 --- /dev/null +++ b/src/Dataviz/TwitterMessagesTest.class.st @@ -0,0 +1,20 @@ +" +A MessagesCollectionTest is a test class for testing the behavior of MessagesCollection +" +Class { + #name : #TwitterMessagesTest, + #superclass : #TestCase, + #category : #'Dataviz-Tests' +} + +{ #category : #tests } +TwitterMessagesTest >> testMessagesSplitSize [ + "I test that a well splited collection will be diveded into disjunt subcollections + that sum the size of the original collection." + | testFileUrl testObject | + testFileUrl := 'https://ia801506.us.archive.org/31/items/offrayLC-tweets/tweets/2012_03.js'. + testObject := TwitterMessages new + importFromJSONRemoteFile: testFileUrl. + self assert: + (testObject sumSplittedSizes) equals: testObject messages size +] diff --git a/src/Dataviz/TwitterProfile.class.st b/src/Dataviz/TwitterProfile.class.st index e2bd59e..d4cc760 100644 --- a/src/Dataviz/TwitterProfile.class.st +++ b/src/Dataviz/TwitterProfile.class.st @@ -105,6 +105,17 @@ TwitterProfile >> bio: anObject [ bio := anObject ] +{ #category : #'data scrapping' } +TwitterProfile >> detectJPEGAvatarFrom: aHtmlString [ + "Finds the avatar in a twitter's main page profile, scales it (200x200), cast it agains different formats (jpeg, png) and returns it" + + | avatarUrl | + avatarUrl := ((aHtmlString findAllTagsByClass: 'ProfileAvatar-image') at: 1) attributeAt: 'src'. + avatarUrl := avatarUrl copyReplaceAll: '400x400' with: '200x200'. + ((avatarUrl asLowercase endsWith: '.jpeg') or: (avatarUrl asLowercase endsWith: '.jpg')) + ifTrue: [^ true ] ifFalse: [ ^ false ] +] + { #category : #accessing } TwitterProfile >> favs [ ^ favs @@ -162,12 +173,12 @@ TwitterProfile >> lastTweets: anObject [ lastTweets := anObject ] -{ #category : #'data storage / persistance' } +{ #category : #'data storage / persistence' } TwitterProfile >> loadDataFor: aProfileName fromDatabase: aDataBaseFile [ | db queryResults | "openning connection" - db := NBSQLite3Connection on: aDataBaseFile. + db := UDBCSQLite3Connection on: aDataBaseFile. db open. "Querying the data base" queryResults := (db execute: 'SELECT * FROM profiles WHERE screenName="',aProfileName,'";') rows at: 1. @@ -182,7 +193,7 @@ TwitterProfile >> loadDataFor: aProfileName fromDatabase: aDataBaseFile [ following: (queryResults at: 'following') ] -{ #category : #'data storage / persistance' } +{ #category : #'data storage / persistence' } TwitterProfile >> loadDataFromFile: aFileReference [ "Opens the twitter profile from aFileReference stored in the STON format" | tempProfile | @@ -202,7 +213,7 @@ TwitterProfile >> loadDataFromFile: aFileReference [ url := tempProfile url. ] -{ #category : #'data storage / persistance' } +{ #category : #'data storage / persistence' } TwitterProfile >> loadDataFromUrl: anUrl [ "Opens the twitter profile from aFileReference stored in the STON format" @@ -246,12 +257,12 @@ TwitterProfile >> name: anObject [ name := anObject ] -{ #category : #'data storage / persistance' } +{ #category : #'data storage / persistence' } TwitterProfile >> populateDataBase: aDataBaseFile [ "I populate a SQLite database file with myself data" | db | "openning connection" - db := NBSQLite3Connection on: aDataBaseFile. + db := UDBCSQLite3Connection on: aDataBaseFile. db open. "Creating the data base tweets schema" db execute: @@ -279,7 +290,7 @@ TwitterProfile >> populateDataBase: aDataBaseFile [ db close. ] -{ #category : #'data storage / persistance' } +{ #category : #'data storage / persistence' } TwitterProfile >> saveToFile: aFileReference [ "Saves the twitter profile to aFileReference in the STON format" @@ -292,7 +303,7 @@ TwitterProfile >> saveToFile: aFileReference [ TwitterProfile >> sayBye [ "Just says hello to all the people which is listening. A dummy example on how to create new messages" Transcript open. - Transcript show: 'Adios, perfil de Twitter, despidiéndose. Pásala bueno ;-)' + Transcript show: 'Adios, perfil de Twitter, despidi√©ndose. P√°sala bueno ;-)' ] @@ -306,6 +317,26 @@ TwitterProfile >> sayHello [ ] +{ #category : #'data scrapping' } +TwitterProfile >> scrapAvatarForProfile: aProfileName [ + "Scraps data from aProfileName and fills out the TwitterProfile. + The profile name is the last part of a twitter profile url + (i.e: 'https://twitter.com/aProfileName')." + + | client source anUrl | + anUrl := 'https://twitter.com/', aProfileName. + client := ZnClient new. + client get: anUrl. + client isSuccess + ifTrue:[ + source := Soup fromString: (client) contents asString. + avatar := self scrapAvatarFrom: source. + ] + ifFalse:[self inform: 'Algo salió mal. Verifique su conexión a Internet y que el contenido buscado estén disponibles']. + ^ self avatar + +] + { #category : #'data scrapping' } TwitterProfile >> scrapAvatarFrom: aHtmlString [ "Finds the avatar in a twitter's main page profile, scales it (200x200), cast it agains different formats (jpeg, png) and returns it" @@ -313,11 +344,8 @@ TwitterProfile >> scrapAvatarFrom: aHtmlString [ | avatarUrl avatarImage | avatarUrl := ((aHtmlString findAllTagsByClass: 'ProfileAvatar-image') at: 1) attributeAt: 'src'. avatarUrl := avatarUrl copyReplaceAll: '400x400' with: '200x200'. - (avatarUrl asLowercase endsWith: '.png') - ifTrue: [avatarImage := ZnEasy getPng: avatarUrl]. - ((avatarUrl asLowercase endsWith: '.jpeg') or: (avatarUrl asLowercase endsWith: '.jpg')) - ifTrue: [avatarImage := ZnEasy getJpeg: avatarUrl]. - ^avatarImage. + avatarImage := ImageReadWriter formFromStream: (ZnEasy get: avatarUrl) contents readStream. + ^ avatarImage. ] { #category : #'data scrapping' } @@ -337,7 +365,7 @@ TwitterProfile >> scrapDataForProfile: aProfileName [ totalTweets := self asNumber: (numericalData at: 1). following := self asNumber: (numericalData at: 2). followers := self asNumber: (numericalData at: 3). - favs := self asNumber: (numericalData at: 4). + "favs := self asNumber: (numericalData at: 4)." bio := ((source findAllTagsByClass: 'ProfileHeaderCard-bio') at: 1) next contents. avatar := self scrapAvatarFrom: source. name := ((source findAllTagsByClass: 'ProfileHeaderCard-nameLink') at: 1) next contents. @@ -366,7 +394,7 @@ TwitterProfile >> scrapFollowersForProfile: aProfileName [ numericalData := (source findAllTagsByClass: 'ProfileNav-value') collect:[:each | each text]. followers := self asNumber: (numericalData at: 3). ] - ifFalse:[self inform: 'Algo salió mal. Verifique su conexión a Internet y que el contenido buscado estén disponibles']. + ifFalse:[self inform: 'Algo sali√≥ mal. Verifique su conexi√≥n a Internet y que el contenido buscado est√©n disponibles']. ^ self followers ] @@ -380,7 +408,7 @@ TwitterProfile >> scrapTweetsFromFile: aFile [ source := Soup fromString: aFile contents asString. mostTweets := (source findAllTagsByClass: 'Tweet-text') collect:[:each | each text]. ] - ifFalse:[self inform: 'Algo salió mal. Verifique que el contenido del archivo sea el esperado']. + ifFalse:[self inform: 'Algo sali√≥ mal. Verifique que el contenido del archivo sea el esperado']. ^self ] @@ -400,7 +428,7 @@ TwitterProfile >> showWordCloud [ "Creates a tag cloud from tweets of the present twitter profile. Is supposed to be run on a Twitter profile which is not empty" | semiRawText tweetsSource uninterestingWords cookedText1 cookedText2 tagView | - uninterestingWords := #( 'ahí' 'al' 'amp' 'ante' 'aquí' 'así' 'bit' 'cc' 'co' 'com' 'como' 'cómo' 'con' 'cual' 'cuando' 'cuándo' 'da' 'de' 'del' 'desde' 'días' 'do' 'el' 'en' 'entre' 'era' 'es' 'esa' 'ese' 'eso' 'esta' 'está' 'estamos' 'están' 'estarán' 'este' 'esto' 'estos' 'estoy' 'fb' 'fbid' 'girará' 'gl' 'gt' 'goo' 'ha' 'han' 'hay' 'he' 'hoy' 'http' 'https' 'hasta' 'la' 'las' 'le' 'les' 'lo' 'los' 'ly' 'mas' 'más' 'mi' 'muy' 'nbsp' 'ni' 'no' 'nos' 'org' 'otros' 'para' 'pero' 'pic' 'por' 'pues' 're' 'se' 'sea' 'ser' 'será' 'si' 'sin' 'sobre' 'solo' 'sólo' 'son' 'soy' 'su' 'sus' 'te' 'this' 'tiene' 'torno' 'tt' 'tu' 'twitter' 'type' 'php' 'pm' 'que' 'qué' 'quot'a'ud' 'un' 'una' 'uno' 'usted' 'utm' 'va' 'van' 'wp' 'www' 'ya' 'yo' 'youtu'). + uninterestingWords := #( 'ah√≠' 'al' 'amp' 'ante' 'aqu√≠' 'as√≠' 'bit' 'cc' 'co' 'com' 'como' 'c√≥mo' 'con' 'cual' 'cuando' 'cu√°ndo' 'da' 'de' 'del' 'desde' 'd√≠as' 'do' 'el' 'en' 'entre' 'era' 'es' 'esa' 'ese' 'eso' 'esta' 'est√°' 'estamos' 'est√°n' 'estar√°n' 'este' 'esto' 'estos' 'estoy' 'fb' 'fbid' 'girar√°' 'gl' 'gt' 'goo' 'ha' 'han' 'hay' 'he' 'hoy' 'http' 'https' 'hasta' 'la' 'las' 'le' 'les' 'lo' 'los' 'ly' 'mas' 'm√°s' 'mi' 'muy' 'nbsp' 'ni' 'no' 'nos' 'org' 'otros' 'para' 'pero' 'pic' 'por' 'pues' 're' 'se' 'sea' 'ser' 'ser√°' 'si' 'sin' 'sobre' 'solo' 's√≥lo' 'son' 'soy' 'su' 'sus' 'te' 'this' 'tiene' 'torno' 'tt' 'tu' 'twitter' 'type' 'php' 'pm' 'que' 'qu√©' 'quot'a'ud' 'un' 'una' 'uno' 'usted' 'utm' 'va' 'van' 'wp' 'www' 'ya' 'yo' 'youtu'). mostTweets ifNotNil: [ tweetsSource := mostTweets ] ifNil: [ tweetsSource := lastTweets ]. semiRawText := tweetsSource inject: '' into: [:text :each | text, each asString, ' ' ]. diff --git a/src/Dataviz/TwitterProfileOverview.class.st b/src/Dataviz/TwitterProfileOverview.class.st new file mode 100644 index 0000000..f7baf28 --- /dev/null +++ b/src/Dataviz/TwitterProfileOverview.class.st @@ -0,0 +1,292 @@ +" +I represent the overall activity of given twitter profile, in terms of +the kind of messages this profiles emits, being them: tweets, retweets +and replies. +" +Class { + #name : #TwitterProfileOverview, + #superclass : #RTBuilder, + #instVars : [ + 'avatar', + 'messages', + 'screenName', + 'repliedTo', + 'retweetedTo', + 'interactionProfiles', + 'database' + ], + #category : #'Dataviz-Twitter' +} + +{ #category : #accessing } +TwitterProfileOverview >> avatar [ + ^ avatar form +] + +{ #category : #accessing } +TwitterProfileOverview >> avatar: aBitMap [ + avatar := aBitMap +] + +{ #category : #'data visualization' } +TwitterProfileOverview >> avatarWheel [ + "I show the profile avatar surrounded by a wheel representing the proportion between tweets, + retweets and replies of such profile." + | b | + self messages ifEmpty: [^ self]. + b := RTPieBuilder new. + b interaction popup. + b shape current + innerRadius: 150; + externalRadius: 165. + b objects: self messages splitByType values. + b slice: #size. + b normalizer distinctColor. + self avatar ifNotNil: [ b view canvas addShape: (TRBitmapShape new form: self avatar) ]. + ^ b view. +] + +{ #category : #examples } +TwitterProfileOverview >> avatarWheelExample [ + + | o p | + o := self class new. + o screenName: 'ObjectProfile'. + o messages + tweetsSize: 200; + retweetsSize: 70; + repliesSize: 15. + p := TwitterProfile new scrapDataForProfile: o screenName. + o avatar: p avatar. + ^ o avatarWheel +] + +{ #category : #accessing } +TwitterProfileOverview >> database [ + | dataLocation | + (FileLocator documents / 'Grafoscopio') ensureCreateDirectory. + (FileLocator documents / 'Grafoscopio' / 'Projects') ensureCreateDirectory. + (FileLocator documents / 'Grafoscopio' / 'Projects' / 'DataSelfies') ensureCreateDirectory. + dataLocation := FileLocator documents / 'Grafoscopio' / 'Projects' / 'DataSelfies' / 'data-selfies.sqlite'. + database := UDBCSQLite3Connection on: dataLocation fullName. + ^ database +] + +{ #category : #accessing } +TwitterProfileOverview >> database: anObject [ + database := anObject +] + +{ #category : #persistence } +TwitterProfileOverview >> exportInteractionProfilesTo: aFileReference [ + STON put: self interactionProfiles onStreamPretty: aFileReference writeStream +] + +{ #category : #'data visualization' } +TwitterProfileOverview >> frequentTweetedWordsShown: aWorldAmount retweetedProfilesShown: rtProfilesAmount mentionedProfilesShown: mtProfilesAmount [ + "I put all the avatar wheel with tags that come from predefined amounts" + | aColorPalette | + aColorPalette := { Color orange . Color black . Color red }. + ^ self + frequentTweetedWordsShown: aWorldAmount + retweetedProfilesShown: rtProfilesAmount + mentionsProfilesShown: mtProfilesAmount + coloredWith: aColorPalette +] + +{ #category : #'data visualization' } +TwitterProfileOverview >> frequentTweetedWordsShown: aWordsPercentage retweetedProfilesShown: rtProfilesPercentage mentionsProfilesShown: mtProfilesPercentage coloredWith: aColorPalette [ + "I put all the avatar wheel with tags that come from predefined amounts" + | tweetsArc retweetArc mentionsArc tweetsWords retweetedProfiles mentionedProfiles visualWords arcs queries canvasTemp | + self messages ifNil: [ ^ self ]. + tweetsArc := self avatarWheel canvas shapes at: 2. + retweetArc := self avatarWheel canvas shapes at: 1. + mentionsArc := self avatarWheel canvas shapes at: 3. + tweetsWords := self messages wordsByFrequencyInTweetsUpTo: aWordsPercentage. + retweetedProfiles := self messages retweetedProfilesByFrequencyUpTo: rtProfilesPercentage. + mentionedProfiles := self messages mentionedProfilesByFrequencyUpTo: mtProfilesPercentage. + arcs := { tweetsArc . retweetArc . mentionsArc }. + queries := { tweetsWords . mentionedProfiles . retweetedProfiles }. + canvasTemp := self avatarWheel canvas. + arcs doWithIndex: [ :arc :i | + visualWords := TPMessages new + visualWordsFrom: (queries at: i) + colored: (aColorPalette at: i). + arc color: (aColorPalette at: i). + arc + surroundedBy: visualWords + radialGap: 85 + angularGap: 3 + renderedIn: canvasTemp. + canvasTemp := canvasTemp + ]. + ^ canvasTemp +] + +{ #category : #'data scrapping' } +TwitterProfileOverview >> getAvatarForProfile: aTwitterProfile [ + "I scrap the avatar image for aTwitterProfile and use it to fill my avatar form. + aTwitterProfile is the twitter's screen name, the string after the '@' character" + self avatar: (TwitterProfile new scrapAvatarForProfile: aTwitterProfile) + + +] + +{ #category : #persistence } +TwitterProfileOverview >> importIProfilesFromDB [ + | query answer | + query := 'SELECT * FROM interaction_profiles;'. + answer := (self database open execute: query) rows collect: [ :each | each data ]. + self database isOpen ifTrue: [ self database close ]. + answer do: [ :each | + self interactionProfiles add: + (TwitterInteractionProfile new + screenName: (each at: 'screenName'); + avatar: (FLMaterializer materializeFromByteArray: (each at: 'avatar')); + tweets: (each at: 'tweets'); + retweets: (each at: 'retweets') + ) + ] +] + +{ #category : #persistence } +TwitterProfileOverview >> importInteractionProfilesFrom: aFileReference [ + STONReader fromSton: aFileReference contents +] + +{ #category : #initialization } +TwitterProfileOverview >> initialize [ + super initialize. + avatar := RTBitmap new. + messagesStats := TPMessages new. + screenName := String new. + +] + +{ #category : #accessing } +TwitterProfileOverview >> interactionProfiles [ + ^ interactionProfiles ifNil: [ interactionProfiles := OrderedCollection new ] +] + +{ #category : #accessing } +TwitterProfileOverview >> interactionProfiles: anObject [ + interactionProfiles := anObject +] + +{ #category : #persistence } +TwitterProfileOverview >> materializeIProfilesFrom: aFileReference [ + self interactionProfiles: (FLMaterializer materializationFromFileNamed: aFileReference fullName) root +] + +{ #category : #accessing } +TwitterProfileOverview >> messages [ + ^ messages ifNil: [ messages := TwitterMessages new ] +] + +{ #category : #utility } +TwitterProfileOverview >> putAvatarsOnInteractionProfiles [ + self interactionProfiles size isZero ifTrue: [ ^ self ]. + self interactionProfiles do: [ :each | + each avatar: (TwitterProfile new scrapAvatarForProfile: each screenName) + ] +] + +{ #category : #utility } +TwitterProfileOverview >> putTweetsOnInteractionProfiles [ + + self sortedRetweetedTo keysAndValuesDo: [ :k :v | | ip| + ip := TwitterInteractionProfile new + screenName: k; + tweets: v. + self interactionProfiles add: ip + ]. +] + +{ #category : #accessing } +TwitterProfileOverview >> repliedTo [ + ^ repliedTo ifNil: [ repliedTo := OrderedCollection new ] +] + +{ #category : #accessing } +TwitterProfileOverview >> repliedTo: anObject [ + repliedTo := anObject +] + +{ #category : #accessing } +TwitterProfileOverview >> retweetedTo [ + ^ retweetedTo ifNil: [ ^ retweetedTo := OrderedCollection new ] +] + +{ #category : #accessing } +TwitterProfileOverview >> retweetedTo: anObject [ + reposted := anObject +] + +{ #category : #accessing } +TwitterProfileOverview >> screenName [ + ^ screenName +] + +{ #category : #accessing } +TwitterProfileOverview >> screenName: aString [ + screenName := aString +] + +{ #category : #'data queries' } +TwitterProfileOverview >> selectRepliesFrom: aJSONFile [ + | allMessages | + allMessages := NeoJSONReader fromString: (aJSONFile readStream nextLine; upToEnd) contents. + self messages replies: (allMessages select: [ :each | each keys includes: 'in_reply_to_screen_name']). + ^ self messages replies +] + +{ #category : #'data queries' } +TwitterProfileOverview >> selectRetweetsFrom: aJSONFile [ + | allMessages | + allMessages := NeoJSONReader fromString: (aJSONFile readStream nextLine; upToEnd) contents. + self messages retweets: (allMessages select: [ :each | each keys includes: 'retweeted_status']). + ^ self messages retweets +] + +{ #category : #persistence } +TwitterProfileOverview >> serializeIProfilesTo: aFileReference [ + FLSerializer newDefault serialize: self interactionProfiles toFileNamed: aFileReference fullName +] + +{ #category : #'data queries' } +TwitterProfileOverview >> splitMessagesByTypeFrom: aJSONFile [ + ^ self messages splitByType +] + +{ #category : #'data visualization' } +TwitterProfileOverview >> taggedWheelFull [ + "I put all the avatar wheel with tags that come from predefined amounts" + self messages ifNil: [ ^self ]. + self + frequentTweetedWordsShown: self messages tweetsSize + retweetedProfilesShown: self messages retweetsSize + mentionedProfilesShown: self messages repliesSize + +] + +{ #category : #persistence } +TwitterProfileOverview >> updateDBWithInteractionProfiles [ + + + | db | + db := UDBCSQLite3Connection on: self database. + db open. + db execute: + 'CREATE TABLE IF NOT EXISTS interaction_profiles ( + screenName text PRIMARY KEY, + avatar blob, + tweets integer, + retweets integer + );'. + self interactionProfiles do: [ :ip | + db + execute: 'INSERT INTO interaction_profiles values (?, ?, ?, ?);' + with: { ip screenName . ip avatar . ip tweets . ip retweets} + ]. + db close. +] diff --git a/src/Dataviz/TwitterProfileOverviewTest.class.st b/src/Dataviz/TwitterProfileOverviewTest.class.st new file mode 100644 index 0000000..4a0a743 --- /dev/null +++ b/src/Dataviz/TwitterProfileOverviewTest.class.st @@ -0,0 +1,10 @@ +Class { + #name : #TwitterProfileOverviewTest, + #superclass : #TestCase, + #category : #'Dataviz-Twitter' +} + +{ #category : #tests } +TwitterProfileOverviewTest >> testInitializeIsOk [ + self shouldnt: [ TwitterProfileOverview ] raise: Error +]