Source code for bibliometa_vis.visualization

# !/usr/bin/python
# -*- coding: utf-8 -*-

"""This module provides classes and functions to visualize network data on geographical maps."""

import os
import sys

import cartopy.crs as ccrs
import cartopy.io.shapereader as shpreader
import geopandas as gpd
import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd

from loguru import logger
from matplotlib.pyplot import text
from shapely.geometry import Point

from bibliometa.config import LOGGING_FORMAT, GRAPH_VISUALISATION_MAP_CONFIG_DEFAULT
from bibliometa.configuration import BibliometaConfiguration
from bibliometa.graph.utils import load_graph, update_graph, get_nodes, get_subgraph, \
    get_graph_attributes, create_pos, add_nodes_from_graph_corpus


[docs]class Map(BibliometaConfiguration): """The :class:`~bibliometa.graph.visualization.Map` provides functions to visualize geographical network data on a map. """ def __init__(self, **kwargs): """Construct a new :class:`~bibliometa.graph.visualization.Map`.""" super().__init__(GRAPH_VISUALISATION_MAP_CONFIG_DEFAULT, **kwargs) def _update_config(self): """Update configuration with class-specific values and check for configuration correctness.""" self._shp = MapUtils.read_shp(self.config.shapefile, self.config.shapefile_color, self.config.verbose) self._shapes = list(shpreader.Reader(self.config.shapefile).geometries()) self._df = MapUtils.convert_to_gdf(self.config.coordinates, self.config.crs, self.config.coordinates_sep) self.graph = load_graph(self.config, reload=False) # create dictionary of nodes and their node degrees self._nodes = get_nodes(self.graph, self.config, encoding=self.config.encoding) if not os.path.exists(self.config.o): os.makedirs(self.config.o) if not os.path.exists(self.config.graphml): os.makedirs(self.config.graphml)
[docs] def start(self): """Start visualization.""" # Set up logging logger.remove() logger.add(self.config.log, format=LOGGING_FORMAT, level=self.config.log_level_file) if self.config.verbose: logger.add(sys.stderr, level=self.config.log_level_std) logger.info("Start network creation.") self._update_config() # plot all cities from city file as scatter plot if "scatter" in self.config.types: logger.info("Creating scatterplot.") Plotting.scatter(self._df, self.config) # plot all cities from city file on map if "cities" in self.config.types: logger.info("Plotting all cities.") Plotting.cities(self._df, self._shp, self.config) # plot nodes on map if "degrees" in self.config.types: logger.info("Plotting node degrees.") Plotting.degrees(self._df, self._shp, self._nodes, self.config) # add nodes with degree == 0 if desired if self.config.all_nodes: self.graph = add_nodes_from_graph_corpus(self.graph, self.config.graph_corpus, self.config.singletons, encoding=self.config.encoding) # remove unneeded nodes from the graph update_graph(self.graph, self._df, self.config.keys_labels[0]) # get largest component as subgraph and plot on map if "map" in self.config.types: logger.info("Plotting network on map.") if not self.config.components: subgraph = get_subgraph(self.graph) else: subgraph = self.graph pos, remove = create_pos(subgraph, self._df, self.config.keys_labels[0], self.config.map_extent, self.config.verbose) subgraph.remove_nodes_from(remove) attributes = get_graph_attributes(subgraph) Plotting.graph_on_map(self._shapes, subgraph, attributes, pos, self._df, self.config) logger.info("Network creation finished.") logger.info(f"Logfile written to {self.config.log}")
[docs]class Plotting: """The :class:`~bibliometa.graph.visualization.Plotting` provides functions to plot network data."""
[docs] @staticmethod def scatter(df, config): """Create scatter plot. :param df: DataFrame with 'lng' and 'lat' column :type df: `pandas.DataFrame` :param config: Configuration object :type config: `bibliometa.configuration.Config` """ plot = plt.scatter(x=df['lng'], y=df['lat']) if config.verbose: plt.show() for ext in config.o_formats: _path = f"{config.o}scatter/{config.name}.{ext}" dirname = os.path.dirname(_path) if not os.path.exists(dirname): os.makedirs(dirname) fig = plot.get_figure() fig.savefig(_path, bbox_inches='tight') plt.clf() plt.close('all')
[docs] @staticmethod def cities(df, shp, config): """Plot cities on a certain map. :param df: DataFrame with city information :type df: `pandas.DataFrame` :param shp: Shapefile :type shp: GeoDataFrame :param config: Configuration object :type config: `bibliometa.configuration.Config` """ plot = df.plot(ax=shp.plot(figsize=config.figsize, marker='o', color=config.shapefile_color, markersize=45), aspect=1) if config.verbose: plt.show() for ext in config.o_formats: _path = f"{config.o}cities/{config.name}.{ext}" dirname = os.path.dirname(_path) if not os.path.exists(dirname): os.makedirs(dirname) fig = plot.get_figure() fig.savefig(_path, bbox_inches='tight') plt.clf() plt.close('all')
[docs] @staticmethod def degrees(df, shp, nodes, config): """Plot graph with node degrees. :param df: DataFrame with city information :type df: `pandas.DataFrame` :param nodes: Graph nodes with their degrees :type nodes: `dict` :param shp: Shapefile :type shp: GeoDataFrame :param config: Configuration object :type config: `bibliometa.configuration.Config` """ fig, ax = plt.subplots(figsize=config.figsize) shp.plot(ax=ax, alpha=0.6, color=config.shapefile_color) for city in nodes.keys(): if city in df[config.keys_labels[1]].tolist(): df[df[config.keys_labels[1]] == city].plot(ax=ax, markersize=nodes[city], color=config.degree_node_color, marker="o") for x, y, label in zip(df.geometry.x, df.geometry.y, df.city): if label in nodes.keys(): ax.annotate(label, xy=(x, y), xytext=(3, 3), textcoords="offset points") if config.verbose: plt.show() for ext in config.o_formats: _path = f"{config.o}degrees/{config.name}.{ext}" dirname = os.path.dirname(_path) if not os.path.exists(dirname): os.makedirs(dirname) fig.savefig(_path, bbox_inches='tight') plt.clf() plt.close('all')
[docs] @staticmethod def graph_on_map(shapes, subgraph, attributes, pos, df, config): """Plot graph on a map. :param shapes: An iterator of shapely geometries from a shapefile :type shapes: `list` :param subgraph: Largest component of graph :type subgraph: `networkx.Graph` :param attributes: Dictionary of graph degrees, labels and sizes :type attributes: `dict` :param pos: Dictionary of node positions :type pos: `dict` :param df: DataFrame with coordinates :type df: `pandas.DataFrame` :param config: Configuration object :type config: `bibliometa.configuration.Config` """ crs = ccrs.PlateCarree() fig, ax = plt.subplots(1, 1, figsize=config.figsize, subplot_kw=dict(projection=crs)) ax.add_geometries(shapes, crs, edgecolor='black', facecolor=config.shapefile_color, alpha=0.2) ax.coastlines() if config.map_extent == "global": ax.set_global() else: ax.set_extent(config.map_extent) nx.draw_networkx(subgraph, ax=ax, # font_size=24, alpha=.5, width=config.edge_width, # width=[subgraph[u][v]['weight'] * 0.1 for u, v in subgraph.edges], # TODO: Implement node_size=attributes["sizes"], labels=attributes["labels"], # TODO: Does this have an effect? pos=pos, with_labels=False, # node_color=altitude, # cmap=plt.cm.autumn ) # add labels for node, (x, y) in pos.items(): fs = config.fontsize try: if attributes["degrees"][node] > config.fontsize: fs = attributes["degrees"][node] if config.max_fontsize and fs > config.max_fontsize: fs = config.max_fontsize label = df[df[config.keys_labels[0]] == node].iloc[0, df.columns.get_loc(config.keys_labels[1])] text(x, y, label, fontsize=fs, ha='center', va='center') except Exception: pass if config.verbose: plt.show() for ext in config.o_formats: _path = f"{config.o}network/{config.name}.{ext}" dirname = os.path.dirname(_path) if not os.path.exists(dirname): os.makedirs(dirname) fig.savefig(_path, bbox_inches='tight') plt.clf() plt.close('all')
[docs]class MapUtils: """The :class:`~bibliometa.graph.visualization.MapUtils` provides utilities for the :class:`~bibliometa.graph.visualization.Map` class. """
[docs] @staticmethod def read_shp(f, color="grey", verbose=False): """Read shapefile. :param f: Path to shapefile :type f: `str` :param color: Color of shapefile background :type color: `str` :param verbose: Verbose parameter :type verbose: `bool` :return: Shapefile :rtype: GeoDataFrame """ shp = gpd.read_file(f) # if verbose: # fig, ax = plt.subplots(figsize=(15, 15)) # shp.plot(ax=ax, alpha=0.6, color=color) # plt.show() return shp
[docs] @staticmethod def convert_to_gdf(csv_input, crs, csv_sep): """Convert long/lat to Point objects. :param csv_input: Path to CSV input file with long/lat information :type csv_input: `str` :param crs: Coordinate Reference System :type crs: `str` :param csv_sep: CSV separator in input file :type csv_sep: `str` """ orig_df = pd.read_csv(csv_input, sep=csv_sep) df = orig_df.copy() try: geometry = [Point(xy) for xy in zip(df["lng"], df["lat"])] df = gpd.GeoDataFrame(df, crs=crs, geometry=geometry) logger.info(f"Successfully converted import data from {csv_input} to GeoDataFrame.") logger.debug(f"Imported city data:\n{orig_df.head()}") logger.debug(f"GeoDataFrame:\n{df.head()}") return df except Exception as e: raise ValueError(f"Could not convert import data from {csv_input} to GeoDataFrame. Exception: {e}")