Extracting information from cranly package networks: extractors and word clouds

Ioannis Kosmidis

2019-07-17

cranly extractor functions

Since version 0.3, cranly includes functions for extracting information from cranly_network objects (see ?extractor-functions). All extractor functions in cranly try to figure out what y is in the statements

y is [the] extractor-function a package/author

Let’s download, clean and organize today’s CRAN database, and build the package and author directives networks

library("cranly")
library("magrittr")
cran_db <- clean_CRAN_db()
package_network <- cran_db %>% build_network(perspective = "package")
author_network <- cran_db %>% build_network(perspective = "author")

Example queries

Packages by Kurt Hornik

Packages by people named “Ioannis”

Packages with “glm” in their name

Authors of the lubridate package

Authors with “Ioan” in their name

Packages suggested by, imported by and enhanced by the sf package

Packages that are suggesting, importing, enhancing the sf package

package_network %>% suggesting("sf", exact = TRUE)
#>  [1] "adklakedata"     "BIOMASS"         "biscale"        
#>  [4] "c14bazAAR"       "cancensus"       "DeclareDesign"  
#>  [7] "dodgr"           "echor"           "EcoIndR"        
#> [10] "eddi"            "fasterize"       "geojson"        
#> [13] "geometa"         "ggformula"       "ggiraph"        
#> [16] "ggplot2"         "googlePolylines" "GSODR"          
#> [19] "gstat"           "gtfsrouter"      "ipumsr"         
#> [22] "isoband"         "janitor"         "leaflet"        
#> [25] "leafpop"         "lutz"            "mapdeck"        
#> [28] "mlr"             "MODIStsp"        "mudata2"        
#> [31] "NetLogoR"        "nlaR"            "nlgeocoder"     
#> [34] "osmdata"         "pinochet"        "plotly"         
#> [37] "raster"          "rcartocolor"     "sdcSpatial"     
#> [40] "SpaDES.core"     "SpaDES.tools"    "spatialreg"     
#> [43] "spatialwidget"   "spbabel"         "spData"         
#> [46] "stormwindmodel"  "streamDepletr"   "swmmr"          
#> [49] "tabularaster"    "USAboundaries"   "weathercan"
package_network %>% importing("sf", exact = TRUE)
#>   [1] "amt"              "areal"            "brazilmaps"      
#>   [4] "btb"              "capm"             "cartogram"       
#>   [7] "cartography"      "cdcfluview"       "compstatr"       
#>  [10] "concaveman"       "crawl"            "crimedata"       
#>  [13] "cyclestreets"     "ebirdst"          "eixport"         
#>  [16] "elevatr"          "EmissV"           "eurostat"        
#>  [19] "FedData"          "fingertipscharts" "foieGras"        
#>  [22] "geogrid"          "geojsonio"        "geonetwork"      
#>  [25] "geoviz"           "ggsn"             "ggspatial"       
#>  [28] "grainchanger"     "GWSDAT"           "hydrolinks"      
#>  [31] "jpmesh"           "jpndistrict"      "kokudosuuchi"    
#>  [34] "LAGOSNE"          "landsepi"         "lconnect"        
#>  [37] "leafem"           "leafpm"           "lidR"            
#>  [40] "linemap"          "link2GI"          "lwgeom"          
#>  [43] "macleish"         "mapedit"          "mapi"            
#>  [46] "mapsapi"          "mapview"          "MODIS"           
#>  [49] "MODISTools"       "moveVis"          "ncdfgeom"        
#>  [52] "nhdR"             "NipponMap"        "NLMR"            
#>  [55] "nlrx"             "oceanis"          "openSTARS"       
#>  [58] "Orcs"             "osrm"             "ows4R"           
#>  [61] "parlitools"       "pct"              "plotdap"         
#>  [64] "qualmap"          "quickmapr"        "RCzechia"        
#>  [67] "readwritesqlite"  "reproducible"     "rgeopat2"        
#>  [70] "rmapshaper"       "rmapzen"          "rnaturalearth"   
#>  [73] "rpostgisLT"       "RPyGeo"           "RQGIS"           
#>  [76] "rSymbiota"        "sabre"            "sfdct"           
#>  [79] "SMITIDstruct"     "smoothr"          "spatialEco"      
#>  [82] "SpatialPosition"  "spatialrisk"      "stats19"         
#>  [85] "stlcsb"           "stplanr"          "sugarbag"        
#>  [88] "tanaka"           "tidycensus"       "tidyRSS"         
#>  [91] "tidytransit"      "tigris"           "tmap"            
#>  [94] "tmaptools"        "trackeRapp"       "transformr"      
#>  [97] "trigpoints"       "uavRmp"           "vein"            
#> [100] "velox"            "windfarmGA"       "wunderscraper"
package_network %>% enhancing("sf", exact = TRUE)
#> [1] "landscapemetrics" "pointdexter"

Packages that depend on the sf package

Packages that are dependencies of the sf package

Packages maintained by everyone with “Helen” in their name

All available info, on packages maintained by everyone with “Helen” in their name

The maintainer of data.table

The email of the maintainer of trackeRapp

All emails of maintainers using an email address from University of Warwick

The title, the description, the version and the license of the semnar package

Distribution of the release dates of all packages in CRAN

Word clouds

Since version 0.5 cranly provides methods to construct word clouds of either author names, package descriptions or package titles. For example, the word cloud of the descriptions of the packages maintained by me, Achim Zeileis, and Edzer Pebesma are

word_cloud(package_network, maintainer = "Ioannis Kosmidis", exact = TRUE, min.freq = 1)

word_cloud(package_network, maintainer = "Achim Zeileis", exact = TRUE, min.freq = 1)

word_cloud(package_network, maintainer = "Edzer Pebesma", exact = TRUE, min.freq = 1)

or the word cloud of the titles of those packages are

word_cloud(package_network, maintainer = "Ioannis Kosmidis", perspective = "title", exact = TRUE,
           scale = c(2, 0.1), min.freq = 1)

word_cloud(package_network, maintainer = "Achim Zeileis", perspective = "title", exact = TRUE,
           scale = c(2, 0.1), min.freq = 1)

word_cloud(package_network, maintainer = "Edzer Pebesma", perspective = "title", exact = TRUE,
           scale = c(2, 0.1), min.freq = 1)

More complex queries can be achieved by using the extractor functions and computing the term frequencies manually. For example, the word cloud of all packages maintained by people with “warwick.ac.uk” in their email is

warwick_emails <- package_network %>% email_with("warwick.ac.uk", flat = FALSE)
warwick_pkgs  <- warwick_emails$package
descriptions <- package_network %>% description_of(warwick_pkgs, exact = FALSE)
term_frequency <- compute_term_frequency(descriptions)
word_cloud(term_frequency, min.freq = 1)