Source code for sattoolbox.plots.raw_material.jz_plots

# -*- coding: utf-8 -*-
"""
Created on Tue May 28 10:01:19 2024

@author: user
"""

import warnings
from typing import Optional

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


[docs]
def plot_curves(
        data,
        t_start=None,
        t_delta=None,
        resample=None,
        rolling=None,
        variables_ax1: Optional[list]= None,
        variables_ax2: Optional[list]= None,
        ylim_ax1 = None,
        ylim_ax2 = None,
        colorsdict = 'black',
        stylesdict = 'solid',
        alpha = 1,
        cycler_ax1=None,
        cycler_ax2=None,
        layout = 'constrained',
        layout_kwargs: Optional[dict] = None,
        title = None,
        ylabel_ax1 = None,
        ylabel_ax2 = None,
        xlabel = None,
        annotate_curves=False,
        figsize = (10,6),
        **kwargs
        ):
    
    """ Plot curves from a DataFrame. Features:
        - Plot curves on primary or secondary axis (specified via "variables_ax1"
          and "variables_ax2" or inferred automatically, if given)
        - use dictionaries to define colors and styles for variables. This is 
          useful to ensure that the same physical quantities are plotted in the 
          same color and the same units / positions of measurement / ... in the 
          same style in the figure. Or you choose to provide a plt.cycler
          directly (which overrides the color and styles dictionaries).
        - (optional) select timerange that shall be used for plotting via 
          t_start and t_range
        - (optional) resample data (mean value for period)
        - (optional) apply rolling mean on data
        - (optional) annotation of curves (this functionality probably needs 
          improvement for better placement)
         
        Parameters
        ----------
        data : pd.DataFrame
            DataFrame with simulation results
        t_start : datetime.datetime
            starting time of the plot, default=None (then plot will start at 
            min(data_dict[data_dict.keys()[0]].index()) )
        t_delta : datetime.timedelta or str
            range of the time axis, either timedelta or string like '7D', '1H'; 
            default=None (then plot will go up to the end of data)
        resample : string
            Period for resampling the data (mean), default=None, choose a valid
            resampling string such as 'D', 'H' or '15T'
        rolling : string
            Window for rolling mean, default=None, choose a valid window string
            such as 'D', 'H' or '15T'
        variables_ax1 : list of strings
            Variables to be plotted on primary axis, default []
        variables_ax2 : list of strings
            Variables to be plotted on secondary axis, default []   
        ylim_ax1 : tuple of numbers
            Limits of the primary y-axis.
        ylim_ax2 : tuple of numbers
            Limits of the secondary y-axis.
        colorsdict : dict {'variable_name':'color'} or str
            Single color string or dict of colors to be used for plotting the 
            curves; default='black'
        stylesdict : dict {'unit_name':'style'} or str
            Single style string or dict of styles to be used for plotting the 
            curves. Note that matching of unit_names is done from the end of 
            the variable names; default='black'
        alpha : float
            Number in [0;1] to indicate transparency of curves.
        cycler_ax1 : Cycler
            Cycler object for colors and style properties on primary axis. Can 
            be constructed via matplotlib.pyplot.cycler(color=[...], 
            line_style=[...]); Using this option overrides colorsdict, 
            stylesdict and alpha.
        cycler_ax2 : Cycler
            Cycler object for secondary axis.
        layout : str
            Layout option for matplotlib.figure.Figure.set_layout_engine(). 
            Default is 'constrained'
        layout_kwargs : dict
            Additional keyword arguments to be passed to matplotlib.figure.
            Figure.set_layout_engine(). Default is {}.
        titel : str
            Title used as supertitle of the figure. Default is 'None'.
        y_label_ax1 : str
            Label for the primary axis. Default is 'None', which lets this 
            function try to infer the label from the variable names.
        y_label_ax2 : str
            Label for the secondary axis. Default is 'None', which lets this 
            function try to infer the label from the variable names.
        x_label : str
            Label for the x-axis of the plot. Default is 'None', which means no 
            label will be given to the x-axis.
        annotate_curves : Boolean
            Wether curve labels shall be added as annotation to the curves. 
            Default=False
        figsize : tuple of numbers
            Size of the figure. Default is (10,6).
        kwargs : dict
            additional keyword arguments with standard plot options, default={}

        
        Returns
        -------
        plot_data : pd.DataFrame
            All data in the plot.
        fig : matplotlib.figure.Figure
            Figure that was drawn
    """
    
    # Set up dataframe with data for plotting
    plot_data = pd.DataFrame()
    
    if isinstance(t_delta, str): 
        t_delta = pd.Timedelta(t_delta)
    
    t_start = data.index.min() if (t_start is None) else max(t_start, data.index.min())
    t_end = data.index.max() if (t_delta is None) else min(t_start+t_delta, data.index.max())

    # try to place variables two axes, if not explicitely given 
    # => if exactly two different known physical quantities 
    # => otherwise: all on first axis + warning if more than 2 quantities
    if variables_ax1 is None:
        variables_ax1 = []
    if variables_ax2 is None:
        variables_ax2 = []

    if variables_ax1 == [] and variables_ax2 == []:
        variables = list(data.columns)
        matched = list({
            _find_longest_match(_physical_quantities.columns, var) for var in variables
        })
        if len(matched) == 2:
            variables_ax1 = [var for var in variables if var.startswith(matched[0])]
            variables_ax2 = [var for var in variables if var.startswith(matched[1])]
        else: 
            variables_ax1 = variables
    
    plot_data=data.loc[t_start:t_end,variables_ax1+variables_ax2]
    
    if len(plot_data.index)<2:
        if len(data.index)==2:
            print('Data to plot seems to be parameters that are constant over time')
            plot_data.loc[t_start,:]=data.iloc[0]
            plot_data.loc[t_end,:]=data.iloc[1]
            
    if resample is not None:
        plot_data = plot_data.resample(
            resample,loffset=pd.tseries.frequencies.to_offset(resample)/2).mean()
        
    if rolling is not None:
        plot_data = plot_data.rolling(rolling,closed='neither').mean() 
        # Bemerkung: Der gleitende Mittelwert ist nur bedingt geeignet, da er einzelne Spitzen 
        # zwar dämpft aber dennoch enthält! Besser ist resample, denn wenn dynamische Effekte 
        # sich über einen Tag ausgleichen, sind die Werte dann "glatt"!
        
    # Define Colors
    if isinstance(colorsdict, dict):
        colorkeys=colorsdict.keys()
        if variables_ax1 != []: 
            colors_ax1 = [colorsdict[_find_longest_match(colorkeys, var)] for var in variables_ax1]
        if variables_ax2 != []: 
            colors_ax2 = [colorsdict[_find_longest_match(colorkeys, var)] for var in variables_ax2]
    elif isinstance(colorsdict, str):
        if variables_ax1 != []: 
            colors_ax1 = [colorsdict]*len(variables_ax1)
        if variables_ax2 != []: 
            colors_ax2 = [colorsdict]*len(variables_ax2)
    else:
        warnings.warn("Parameter 'colorsdict' must be either a dict like {variable:color}" +
                      " or a string indicating a color. You specified " + str(colorsdict) +
                      ". Default color 'black' is used for all curves.")
        if variables_ax1 != []: 
            colors_ax1 = ['black']*len(variables_ax1)
        if variables_ax2 != []: 
            colors_ax2 = ['black']*len(variables_ax2)  
            
    # Define styles
    if isinstance(stylesdict, dict):
        stylekeys=stylesdict.keys()
        if variables_ax1 != []: 
            styles_ax1 = [(stylesdict[_find_longest_match(stylekeys, variable,mode='end')] 
                           if any(variable.endswith(key) for key in stylesdict.keys()) 
                           else 'solid') 
                           for variable in variables_ax1]
        if variables_ax2 != []: 
            styles_ax2 = [(stylesdict[_find_longest_match(stylekeys, variable,mode='end')] 
                           if any(variable.endswith(key) for key in stylesdict.keys()) 
                           else 'solid') 
                           for variable in variables_ax2]
    elif isinstance(stylesdict, str):
        if variables_ax1 != []: 
            styles_ax1 = [stylesdict]*len(variables_ax1)
        if variables_ax2 != []: 
            styles_ax2 = [stylesdict]*len(variables_ax2)        
    else:
        if variables_ax1 != []: 
            styles_ax1 = ['solid']*len(variables_ax1)
        if variables_ax2 != []: 
            styles_ax2 = ['solid']*len(variables_ax2)

    # infer ylabels (if not given)
    ylabel_ax1 = _infer_ylabel(ylabel_ax1, variables_ax1)
    ylabel_ax2 = _infer_ylabel(ylabel_ax2, variables_ax2)
    
    # set up figure and axes
    fig, ax1 = plt.subplots(figsize=figsize)
    if layout is not None: 
        if layout_kwargs is None: 
            layout_kwargs = {}  
        fig.set_layout_engine(layout=layout, **layout_kwargs) 
    ax2=ax1.twinx()    

    # plot variables on ax1
    if variables_ax1 != []: 
        if cycler_ax1 is None: 
            cycler_ax1 = plt.cycler(linestyle = styles_ax1, 
                                    color = colors_ax1, 
                                    alpha=[alpha]*len(variables_ax1))
        ax1.set_prop_cycle(cycler_ax1)    
        plot_data.plot(y=variables_ax1,ax=ax1,**kwargs) #label=label,

    # plot variables on ax2
    if variables_ax2!=[]: 
        if cycler_ax2 is None: 
            cycler_ax2 = plt.cycler(linestyle = styles_ax2, 
                                    color = colors_ax2, 
                                    alpha=[alpha]*len(variables_ax2))
        ax2.set_prop_cycle(cycler_ax2)       
        plot_data.plot(y=variables_ax2,ax=ax2,**kwargs) #label=label,
        
    # annotate curves
    if annotate_curves:
        pos_annotations_ax1 = plot_data.index[int(0.2*len(plot_data))]
        pos_annotations_ax2 = plot_data.index[int(0.8*len(plot_data))]

        for n, variable in enumerate(variables_ax1):
            values_ax1 = plot_data.loc[:,variables_ax1].to_numpy()
            ax1.annotate(variable,
                xy = (pos_annotations_ax1,plot_data.loc[pos_annotations_ax1,variable]),
                xytext = (pos_annotations_ax1,plot_data.loc[pos_annotations_ax1, variable]
                          +0.1*(values_ax1.max()-values_ax1.min())),
                color = colors_ax1[n],
                arrowprops={"arrowstyle":"-", "connectionstyle":"arc3", "color" : colors_ax1[n]}                    
                )
        for n, variable in enumerate(variables_ax2):
            values_ax2 = plot_data.loc[:,variables_ax2].to_numpy()
            ax2.annotate(variable,
                xy = (pos_annotations_ax2,plot_data.loc[pos_annotations_ax2,variable]),
                xytext = (pos_annotations_ax2,plot_data.loc[pos_annotations_ax2, variable]
                          +0.1*(values_ax2.max()-values_ax2.min())),
                color = colors_ax2[n],
                arrowprops={"arrowstyle":"-", "connectionstyle":"arc3", "color" : colors_ax2[n]}                    
                )
    
    # set y limits     
    if ylim_ax1 is not None:
        ax1.set_ylim(ylim_ax1)
    if ylim_ax2 is not None:
        ax2.set_ylim(ylim_ax2)

    # add separate legends for primary and secondary y-axis
    ax1.legend([line.get_label() for line in ax1.lines], 
               title='Variables on prim. axis', loc='upper left')
    ax2.legend([line.get_label() for line in ax2.lines], 
               title='Variables on sec. axis', loc='upper right')
    
    # add horizontal labels for primary and secondary y-axis
    label_kwargs = {'rotation':'horizontal', 'rotation_mode':"anchor", 
                    'verticalalignment':'baseline', 'ha':'left'}
    ax1.set_ylabel(ylabel_ax1, **label_kwargs)
    ax1.yaxis.set_label_coords(-0.05, 1.03)
    
    ax2.set_ylabel(ylabel_ax2,**label_kwargs)
    ax2.yaxis.set_label_coords(1.05, 1.03)
    
    # add label for x-axis
    ax1.set_xlabel(xlabel)
    
    # add title
    fig.suptitle(title)
    
    return plot_data, fig



### Helper functions

def _find_longest_match(searchlist, match, mode='start'):
    """
    Find element from list, that has longest match with match

    Parameters
    ----------
    searchlist : list
        list of strings to search longest match in.
    match : str
        search string.
    mode : str
        whether to match from 'start' or 'end' of the items in searchlist

    Returns
    -------
    item : str
        longest match if any, None otherwise.

    """
    searchlist_revsorted = list(searchlist).copy()
    searchlist_revsorted.sort(key=len,reverse=True)  
    for item in searchlist_revsorted:
        if mode == 'start':
            if match.startswith(item): 
                return item
        elif mode == 'end':
            if match.endswith(item): 
                return item
        else:
            raise ValueError("'mode' must be one of 'start' or 'end'. You provided '"+mode+"'.")
    return None


_physical_quantities = pd.DataFrame(data={
    'T' : ['Temperature T in °C', 'r', r'$T_{SL}$', '°C'],
    'dT' : [r'temperature difference $\Delta$T in °C', 'r', r'$\DeltaT$', '°C'],
    'p' : ['pressure p in bar', 'grey', '$p$', 'bar'],
    'dp' : [r'differential pressure $\Delta$p [bar]', 'lightgrey', r'$\Delta$p', 'bar'],
    'm_flow' : [r'mass flow $\dot{m}$ in kg/s', 'g', r'$\dot{m}$', 'kg/s'],
    'Q_flow' : [r'heat flow $\dot{Q}$ in W', 'orange', r'$\dot{Q}$', 'W'],
    },
    index=['label', 'color', 'symbol', 'unit'])

def _infer_ylabel(ylabel, variables, ylabel_dict=_physical_quantities.loc['label',:].to_dict()):
    """
    Try to infer a proper label, if it is None for y axis from the variables names

    Parameters
    ----------
    ylabel : str
        ylabel string for this axis
    variables : list
        list of variables to be plotted on this axis
    ylabel_dict : dict
        dictionary of predefined labels
    Returns
    -------
    ylabel : str
        infered ylabel, if it was None, else ylabel

    """
       
    if (ylabel is None) and len(variables) > 0:
        warnings.warn("You did not pass a label name for this axis." +
                      "Trying to infer a label name, please check if it is correct.")
        matches = []
        for variable in variables:
            this_match = _find_longest_match(ylabel_dict.keys(), variable)
            if (this_match is not None) and this_match not in matches: 
                matches += this_match
        if len(matches) == 0:
            warnings.warn("Unable to infer label name, returning None.")
            return None
        if len(matches) > 1:
            warnings.warn("Found more than one possible label for primary y-axis,"+
                          " returning None.")
            return None
        return matches[0]
    
    return ylabel

# Finally an example    

[docs]
def example():
    """
    Run examples of the functions defined in this module.
    There are two ways of using this example:
        - import sattoolbox as stb and call stb.plots.fm_plots.example()
        - directly run this file like a script (this works, because "if __name__ == '__main__':" 
          runs this example)

    Returns
    -------
    None.

    """
    
    colorsdict = {
        'T_SL' : 'red',
        'T_RL' : 'blue',
        'm_flow' : 'green'}
    
    stylesdict = {
        '1' : 'solid',
        '2' : 'dashed'}
    
    # =============================================================================
    #     Line Plot Example
    # =============================================================================
    print("Testing plot_curves...")
    print("Creating data")
    x = np.linspace(-np.pi, np.pi, 8760)
    y_year = np.sin(x)
    y_day = np.sin(x*365)
    
    hours = pd.date_range(start="2023-01-01", end="2023-12-31 23:00", freq="1h")
    
    data = pd.DataFrame(data={
        "T_SL_1": 70+15*y_year+1*y_day,
        "T_SL_2": 60+10*y_year+1*y_day,
        "T_RL_1": 40+2*y_year+2*y_day,
        "T_RL_2": 20+10*y_year+5*y_day,
        'm_flow_1': 10+3*y_year+5*y_day,
        'm_flow_2': 11+4*y_year+1*y_day,
        }, index=hours)
    print("Data creation complete")


    print("Creating Example plots")
    
    # example with only minimum settings
    plot_curves(data,
               colorsdict=colorsdict,
               stylesdict=stylesdict)
    
    # example with nearly everything defined
    figsize=(6,6)
    plot_curves(data, 
               figsize = figsize,
               t_start = data.index[0],
               t_delta = "7D",
               variables_ax1=['T_SL_1', 'T_SL_2', 'T_RL_1', 'T_RL_2'],
               variables_ax2=['m_flow_1', 'm_flow_2'],
               colorsdict = colorsdict,
               stylesdict = stylesdict,
               alpha = 0.7,
               title="Example plot with temperatures and mass flows at two different places", 
               xlabel = 'Time',
               ylim_ax1=[0,100], ylim_ax2=[0,20], 
               ylabel_ax1='Temperature in °C', ylabel_ax2 = 'Mass flow in kg/s',
               annotate_curves=True)