Skip to content

Plot

Plot Class#

statista.plot.Plot #

Visualization utilities for statistical distributions and analyses.

This class provides static methods for creating various types of statistical plots including probability density functions (PDF), cumulative distribution functions (CDF), detailed distribution plots, and confidence interval visualizations.

All methods return matplotlib Figure and Axes objects, allowing for further customization if needed before saving or displaying the plots.

Examples:

  • Generate some sample data:
    >>> import numpy as np
    >>> from statista.plot import Plot
    >>> from statista.distributions import Normal
    >>> data = np.random.normal(loc=10, scale=2, size=100)
    
  • Fit a normal distribution:
    >>> normal_dist = Normal(data)
    >>> normal_dist.fit_model()
    -----KS Test--------
    Statistic = 0.09
    Accept Hypothesis
    P value = 0.8154147124661313
    {'loc': np.float64(9.876997051725278), 'scale': np.float64(2.010896054339655)}
    
  • Generate points for plotting:
    >>> x = np.linspace(min(data), max(data), 10000)
    >>> pdf_values = normal_dist.pdf(data=x)
    
  • Create a PDF plot:
    >>> fig, ax = Plot.pdf(x, pdf_values, data)
    
    PDF Plot Example
Source code in statista/plot.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
class Plot:
    """Visualization utilities for statistical distributions and analyses.

    This class provides static methods for creating various types of statistical plots
    including probability density functions (PDF), cumulative distribution functions (CDF),
    detailed distribution plots, and confidence interval visualizations.

    All methods return matplotlib Figure and Axes objects, allowing for further customization
    if needed before saving or displaying the plots.

    Examples:
        - Generate some sample data:
            ```python
            >>> import numpy as np
            >>> from statista.plot import Plot
            >>> from statista.distributions import Normal
            >>> data = np.random.normal(loc=10, scale=2, size=100)

            ```
        - Fit a normal distribution:
            ```python
            >>> normal_dist = Normal(data)
            >>> normal_dist.fit_model()
            -----KS Test--------
            Statistic = 0.09
            Accept Hypothesis
            P value = 0.8154147124661313
            {'loc': np.float64(9.876997051725278), 'scale': np.float64(2.010896054339655)}
            ```
        - Generate points for plotting:
            ```python
            >>> x = np.linspace(min(data), max(data), 10000)
            >>> pdf_values = normal_dist.pdf(data=x)
            ```
        - Create a PDF plot:
            ```python
            >>> fig, ax = Plot.pdf(x, pdf_values, data)

            ```
            ![PDF Plot Example](./../_images/plot/plot-pdf.png)
    """

    def __init__(self):
        pass

    @staticmethod
    def pdf(
        qx: np.ndarray,
        pdf_fitted,
        data_sorted: np.ndarray,
        fig_size: Tuple[float, float] = (6, 5),
        xlabel: str = "Actual data",
        ylabel: str = "pdf",
        fontsize: int = 11,
    ) -> Tuple[Figure, Axes]:
        """Create a probability density function (PDF) plot.

        Generates a plot showing both the fitted probability density function curve
        and a histogram of the actual data for visual comparison.

        Args:
            qx: Array of x-values for plotting the fitted PDF curve. Typically generated
                as a linspace between the min and max of the actual data.
            pdf_fitted: Array of PDF values corresponding to each point in qx.
                Usually obtained from a distribution's pdf method.
            data_sorted: The actual data to be plotted as a histogram.
            fig_size: Figure size as (width, height) in inches. Defaults to (6, 5).
            xlabel: Label for the x-axis. Defaults to "Actual data".
            ylabel: Label for the y-axis. Defaults to "pdf".
            fontsize: Font size for labels. Defaults to 11.

        Returns:
            tuple: A tuple containing:
                - Figure: The matplotlib Figure object
                - Axes: The matplotlib Axes object containing the plot

        Examples:
            - Generate some sample data:
                ```python
                >>> import numpy as np
                >>> from statista.plot import Plot
                >>> from statista.distributions import Normal
                >>> data = np.random.normal(loc=10, scale=2, size=100)

                ```
            - Fit a normal distribution:
                ```python
                >>> normal_dist = Normal(data)
                >>> normal_dist.fit_model() # doctest: +SKIP
                -----KS Test--------
                Statistic = 0.08
                Accept Hypothesis
                P value = 0.9084105017744525
                {'loc': np.float64(10.031759532159755), 'scale': np.float64(1.819201407871162)}

                ```
            - Generate points for plotting
                ```python
                >>> x = np.linspace(min(data), max(data), 1000)
                >>> pdf_values = normal_dist.pdf(data=x)

                ```
            - Create a PDF plot:
                ```python
                >>> fig, ax = Plot.pdf(x, pdf_values, data)

                ```

                - Further customize the plot if needed
                >>> ax.set_title("Normal Distribution PDF")
                >>> ax.grid(True)

                ```
                ![PDF Plot Example](./../_images/plot/plot-pdf-2.png)

        See Also:
            - Plot.cdf: For plotting cumulative distribution functions
            - Plot.details: For plotting both PDF and CDF together
        """
        fig = plt.figure(figsize=fig_size)
        # gs = gridspec.GridSpec(nrows=1, ncols=2, figure=fig)
        # Plot the histogram and the fitted distribution, save it for each gauge.
        ax = fig.add_subplot()
        ax.plot(qx, pdf_fitted, "-", color="#27408B", linewidth=2)
        ax.hist(
            data_sorted, density=True, histtype="stepfilled", color="#DC143C"
        )  # , alpha=0.2
        ax.set_xlabel(xlabel, fontsize=fontsize)
        ax.set_ylabel(ylabel, fontsize=fontsize)
        plt.show()
        return fig, ax

    @staticmethod
    def cdf(
        qx: np.ndarray,
        cdf_fitted: np.ndarray,
        data_sorted: np.ndarray,
        cdf_weibul: np.ndarray,
        fig_size: Tuple[float, float] = (6, 5),
        xlabel: str = "Actual data",
        ylabel: str = "cdf",
        fontsize: int = 11,
    ) -> Tuple[Figure, Axes]:
        """Create a cumulative distribution function (CDF) plot.

        Generates a plot showing both the fitted cumulative distribution function curve
        and the empirical CDF points from the actual data for visual comparison.

        Args:
            qx: Array of x-values for plotting the fitted CDF curve. Typically generated
                as a linspace between the min and max of the actual data.
            cdf_fitted: Array of CDF values corresponding to each point in qx.
                Usually obtained from a distribution's cdf method.
            data_sorted: The sorted actual data points.
            cdf_weibul: The empirical CDF values, typically calculated using the Weibull formula
                or another plotting position formula.
            fig_size: Figure size as (width, height) in inches. Defaults to (6, 5).
            xlabel: Label for the x-axis. Defaults to "Actual data".
            ylabel: Label for the y-axis. Defaults to "cdf".
            fontsize: Font size for labels and legend. Defaults to 11.

        Returns:
            tuple: A tuple containing:
                - Figure: The matplotlib Figure object
                - Axes: The matplotlib Axes object containing the plot

        Examples:
            - Generate some sample data:
                ```python
                >>> import numpy as np
                >>> from statista.plot import Plot
                >>> from statista.distributions import Normal
                >>> data = np.random.normal(loc=10, scale=2, size=100)
                >>> data_sorted = np.sort(data)

                ```
            - Calculate empirical CDF using Weibull formula:
                ```python
                >>> n = len(data_sorted)
                >>> cdf_empirical = np.arange(1, n + 1) / (n + 1)  # Weibull formula

                ```
            - Fit a normal distribution:
                ```python
                >>> normal_dist = Normal(data)
                >>> normal_dist.fit_model() # doctest: +SKIP
                -----KS Test--------
                Statistic = 0.08
                Accept Hypothesis
                P value = 0.9084105017744525
                {'loc': np.float64(9.62108385209537), 'scale': np.float64(2.1593427284432147)}

                ```
            - Generate points for plotting:
                ```python
                >>> x = np.linspace(min(data), max(data), 1000)
                >>> cdf_values = normal_dist.cdf(data=x)

                ```
            - Create a CDF plot
                ```python
                >>> fig, ax = Plot.cdf(x, cdf_values, data_sorted, cdf_empirical)

                ```
            - Further customize the plot if needed
                ```python
                >>> ax.set_title("Normal Distribution CDF")
                >>> ax.grid(True)

                ```
                ![CDF Plot Example](./../_images/plot/plot-cdf.png)

        See Also:
            - Plot.pdf: For plotting probability density functions
            - Plot.details: For plotting both PDF and CDF together
        """
        fig = plt.figure(figsize=fig_size)
        ax = fig.add_subplot()
        ax.plot(
            qx, cdf_fitted, "-", label="Estimated CDF", color="#27408B", linewidth=2
        )
        ax.scatter(
            data_sorted,
            cdf_weibul,
            label="Empirical CDF",
            color="orangered",
            facecolors="none",
        )
        ax.set_xlabel(xlabel, fontsize=fontsize)
        ax.set_ylabel(ylabel, fontsize=fontsize)
        plt.legend(fontsize=fontsize, framealpha=1)
        plt.show()
        return fig, ax

    @staticmethod
    def details(
        qx: Union[np.ndarray, list],
        q_act: Union[np.ndarray, list],
        pdf: Union[np.ndarray, list],
        cdf_fitted: Union[np.ndarray, list],
        cdf: Union[np.ndarray, list],
        fig_size: Tuple[float, float] = (10, 5),
        xlabel: str = "Actual data",
        ylabel: str = "cdf",
        fontsize: int = 11,
    ) -> Tuple[Figure, Tuple[Axes, Axes]]:
        """Create a detailed distribution plot with both PDF and CDF.

        Generates a side-by-side plot showing both the probability density function (PDF)
        and cumulative distribution function (CDF) for a fitted distribution compared
        with the actual data. This provides a comprehensive view of how well the
        distribution fits the data.

        Args:
            qx: Array of x-values for plotting the fitted curves. Typically generated
                as a linspace between the min and max of the actual data.
            q_act: The actual data points.
            pdf: Array of PDF values corresponding to each point in qx.
                Usually obtained from a distribution's pdf method.
            cdf_fitted: Array of CDF values corresponding to each point in qx.
                Usually obtained from a distribution's cdf method.
            cdf: The empirical CDF values, typically calculated using the Weibull formula
                or another plotting position formula.
            fig_size: Figure size as (width, height) in inches. Defaults to (10, 5).
            xlabel: Label for the x-axis. Defaults to "Actual data".
            ylabel: Label for the y-axis of the CDF plot. Defaults to "cdf".
            fontsize: Font size for labels. Defaults to 11.

        Returns:
            tuple: A tuple containing:
                - Figure: The matplotlib Figure object
                - tuple: A tuple of two Axes objects (ax1, ax2) where:
                    - ax1: The left subplot containing the PDF
                    - ax2: The right subplot containing the CDF

        Examples:
            - Import necessary libraries:
                ```python
                >>> import numpy as np
                >>> from statista.plot import Plot
                >>> from statista.distributions import Normal

                ```
            - Generate some sample data:
                ```python
                >>> data = np.random.normal(loc=10, scale=2, size=100)
                >>> data_sorted = np.sort(data)

                ```
            - Calculate empirical CDF using Weibull formula:
                ```python
                >>> n = len(data_sorted)
                >>> cdf_empirical = np.arange(1, n + 1) / (n + 1)  # Weibull formula

                ```
            - Fit a normal distribution:
                ```python
                >>> normal_dist = Normal(data_sorted)
                >>> normal_dist.fit_model() # doctest: +SKIP
                -----KS Test--------
                Statistic = 0.06
                Accept Hypothesis
                P value = 0.9942356257694902
                {'loc': np.float64(10.061702421737607), 'scale': np.float64(1.857026806934038)}

                ```
            - Generate points for plotting:
                ```python
                >>> x = np.linspace(min(data), max(data), 1000)
                >>> pdf_values = normal_dist.pdf(data=x)
                >>> cdf_values = normal_dist.cdf(data=x)

                ```
            - Create a detailed plot with both PDF and CDF:
                ```python
                >>> fig, (ax1, ax2) = Plot.details(x, data, pdf_values, cdf_values, cdf_empirical)

                ```
            - Further customize the plots if needed:
                ```python
                >>> ax1.set_title("PDF Comparison")
                >>> ax2.set_title("CDF Comparison")
                >>> fig.suptitle("Normal Distribution Fit", fontsize=14)
                >>> ax1.grid(True)
                >>> ax2.grid(True)

                ```
                ![Details Plot Example](./../_images/plot/plot-detailed.png)
            ```

        See Also:
            - Plot.pdf: For plotting only the probability density function
            - Plot.cdf: For plotting only the cumulative distribution function
        """
        fig = plt.figure(figsize=fig_size)
        gs = gridspec.GridSpec(nrows=1, ncols=2, figure=fig)
        # Plot the histogram and the fitted distribution, save it for each gauge.
        ax1 = fig.add_subplot(gs[0, 0])
        ax1.plot(qx, pdf, "-", color="#27408B", linewidth=2)
        ax1.hist(q_act, density=True, histtype="stepfilled", color="#DC143C")
        ax1.set_xlabel(xlabel, fontsize=fontsize)
        ax1.set_ylabel("pdf", fontsize=fontsize)

        ax2 = fig.add_subplot(gs[0, 1])
        ax2.plot(qx, cdf_fitted, "-", color="#27408B", linewidth=2)

        q_act.sort()
        ax2.scatter(q_act, cdf, color="#DC143C", facecolors="none")
        ax2.set_xlabel(xlabel, fontsize=fontsize)
        ax2.set_ylabel(ylabel, fontsize=15)
        plt.show()
        return fig, (ax1, ax2)

    @staticmethod
    def confidence_level(
        qth: Union[np.ndarray, list],
        q_act: Union[np.ndarray, list],
        q_lower: Union[np.ndarray, list],
        q_upper: Union[np.ndarray, list],
        fig_size: Tuple[float, float] = (6, 6),
        fontsize: int = 11,
        alpha: Number = None,
        marker_size: int = 10,
    ) -> Tuple[Figure, Axes]:
        """Create a confidence interval plot for distribution quantiles.

        Generates a plot showing the theoretical quantiles, actual data points, and
        confidence interval bounds. This is useful for assessing how well a distribution
        fits the data and visualizing the uncertainty in the fit.

        Args:
            qth: Theoretical quantiles (obtained using the inverse_cdf method).
                These values represent what the distribution predicts for each quantile.
            q_act: Actual data points, which will be sorted within the function.
                These are compared against the theoretical quantiles.
            q_lower: Lower limit of the confidence interval for each theoretical quantile.
                Usually calculated based on the distribution parameters and a significance level.
            q_upper: Upper limit of the confidence interval for each theoretical quantile.
                Usually calculated based on the distribution parameters and a significance level.
            fig_size: Figure size as (width, height) in inches. Defaults to (6, 6).
            fontsize: Font size for labels and legend. Defaults to 11.
            alpha: Significance level used for the confidence intervals (e.g., 0.05 for 95% CI).
                Used only for labeling the legend; the actual intervals must be pre-calculated.
            marker_size: Size of the markers for the upper and lower bounds. Defaults to 10.

        Returns:
            tuple: A tuple containing:
                - Figure: The matplotlib Figure object
                - Axes: The matplotlib Axes object containing the plot

        Examples:
            - Import necessary libraries:
                ```python
                >>> import numpy as np
                >>> from statista.plot import Plot
                >>> from statista.distributions import Normal
                ```
            - Generate some sample data:
                ```python
                >>> data = np.random.normal(loc=10, scale=2, size=100)

                ```
            - Fit a normal distribution:
                ```python
                >>> normal_dist = Normal(data)
                >>> normal_dist.fit_model() # doctest: +SKIP
                -----KS Test--------
                Statistic = 0.07
                Accept Hypothesis
                P value = 0.9684099261397212
                {'loc': np.float64(10.51674893337459), 'scale': np.float64(2.002961856532672)}

                ```
            - Generate theoretical quantiles:
                ```python
                >>> p = np.linspace(0.01, 0.99, 100)  # Probability points
                >>> theoretical_quantiles = normal_dist.inverse_cdf(p)

                ```
            - Calculate confidence intervals (simplified example):
            - In practice, these would be calculated based on the distribution parameters
                ```python
                >>> std_error = 0.5  # Example standard error
                >>> z_value = 1.96  # For 95% confidence interval
                >>> lower_ci = theoretical_quantiles - z_value * std_error
                >>> upper_ci = theoretical_quantiles + z_value * std_error

                ```
            - Create the confidence interval plot:
                ```python
                >>> fig, ax = Plot.confidence_level(
                ...     theoretical_quantiles, data, lower_ci, upper_ci, alpha=0.05
                ... )

                ```
            - Further customize the plot if needed
                ```python
                >>> ax.set_title("Normal Distribution Quantile Plot with 95% CI")
                >>> ax.grid(True)

                ```
                ![Confidence Level Plot Example](./../_images/plot/plot-confidence-level.png)

        Notes:
            The function automatically sorts the actual data points (q_act) before plotting.

            The 1:1 line represents perfect agreement between theoretical and actual values.
            Points falling along this line indicate a good fit of the distribution to the data.

            Points falling outside the confidence intervals suggest potential issues with
            the distribution fit at those quantiles.

        See Also:
            - Plot.details: For plotting PDF and CDF together
        """
        q_act.sort()

        fig = plt.figure(figsize=fig_size)
        ax = fig.add_subplot()
        ax.plot(qth, qth, "-.", color="#3D59AB", linewidth=2, label="Theoretical Data")
        # confidence interval
        ax.plot(
            qth,
            q_lower,
            "*--",
            color="grey",
            markersize=marker_size,
            label=f"Lower limit ({int((1 - alpha) * 100)} % CI)",
        )
        ax.plot(
            qth,
            q_upper,
            "*--",
            color="grey",
            markersize=marker_size,
            label=f"Upper limit ({int((1 - alpha) * 100)} % CI)",
        )
        ax.scatter(
            qth,
            q_act,
            color="#DC143C",
            facecolors="none",
            label="Actual Data",
            zorder=10,
        )
        ax.legend(fontsize=fontsize, framealpha=1)
        ax.set_xlabel("Theoretical Values", fontsize=fontsize)
        ax.set_ylabel("Actual Values", fontsize=fontsize)
        plt.show()
        return fig, ax

pdf(qx, pdf_fitted, data_sorted, fig_size=(6, 5), xlabel='Actual data', ylabel='pdf', fontsize=11) staticmethod #

Create a probability density function (PDF) plot.

Generates a plot showing both the fitted probability density function curve and a histogram of the actual data for visual comparison.

Parameters:

Name Type Description Default
qx ndarray

Array of x-values for plotting the fitted PDF curve. Typically generated as a linspace between the min and max of the actual data.

required
pdf_fitted

Array of PDF values corresponding to each point in qx. Usually obtained from a distribution's pdf method.

required
data_sorted ndarray

The actual data to be plotted as a histogram.

required
fig_size Tuple[float, float]

Figure size as (width, height) in inches. Defaults to (6, 5).

(6, 5)
xlabel str

Label for the x-axis. Defaults to "Actual data".

'Actual data'
ylabel str

Label for the y-axis. Defaults to "pdf".

'pdf'
fontsize int

Font size for labels. Defaults to 11.

11

Returns:

Name Type Description
tuple Tuple[Figure, Axes]

A tuple containing: - Figure: The matplotlib Figure object - Axes: The matplotlib Axes object containing the plot

Examples:

  • Generate some sample data:
    >>> import numpy as np
    >>> from statista.plot import Plot
    >>> from statista.distributions import Normal
    >>> data = np.random.normal(loc=10, scale=2, size=100)
    
  • Fit a normal distribution:
    >>> normal_dist = Normal(data)
    >>> normal_dist.fit_model() # doctest: +SKIP
    -----KS Test--------
    Statistic = 0.08
    Accept Hypothesis
    P value = 0.9084105017744525
    {'loc': np.float64(10.031759532159755), 'scale': np.float64(1.819201407871162)}
    
  • Generate points for plotting
    >>> x = np.linspace(min(data), max(data), 1000)
    >>> pdf_values = normal_dist.pdf(data=x)
    
  • Create a PDF plot:

    >>> fig, ax = Plot.pdf(x, pdf_values, data)
    

    • Further customize the plot if needed

      ax.set_title("Normal Distribution PDF") ax.grid(True)

    ``` PDF Plot Example

See Also
  • Plot.cdf: For plotting cumulative distribution functions
  • Plot.details: For plotting both PDF and CDF together
Source code in statista/plot.py
@staticmethod
def pdf(
    qx: np.ndarray,
    pdf_fitted,
    data_sorted: np.ndarray,
    fig_size: Tuple[float, float] = (6, 5),
    xlabel: str = "Actual data",
    ylabel: str = "pdf",
    fontsize: int = 11,
) -> Tuple[Figure, Axes]:
    """Create a probability density function (PDF) plot.

    Generates a plot showing both the fitted probability density function curve
    and a histogram of the actual data for visual comparison.

    Args:
        qx: Array of x-values for plotting the fitted PDF curve. Typically generated
            as a linspace between the min and max of the actual data.
        pdf_fitted: Array of PDF values corresponding to each point in qx.
            Usually obtained from a distribution's pdf method.
        data_sorted: The actual data to be plotted as a histogram.
        fig_size: Figure size as (width, height) in inches. Defaults to (6, 5).
        xlabel: Label for the x-axis. Defaults to "Actual data".
        ylabel: Label for the y-axis. Defaults to "pdf".
        fontsize: Font size for labels. Defaults to 11.

    Returns:
        tuple: A tuple containing:
            - Figure: The matplotlib Figure object
            - Axes: The matplotlib Axes object containing the plot

    Examples:
        - Generate some sample data:
            ```python
            >>> import numpy as np
            >>> from statista.plot import Plot
            >>> from statista.distributions import Normal
            >>> data = np.random.normal(loc=10, scale=2, size=100)

            ```
        - Fit a normal distribution:
            ```python
            >>> normal_dist = Normal(data)
            >>> normal_dist.fit_model() # doctest: +SKIP
            -----KS Test--------
            Statistic = 0.08
            Accept Hypothesis
            P value = 0.9084105017744525
            {'loc': np.float64(10.031759532159755), 'scale': np.float64(1.819201407871162)}

            ```
        - Generate points for plotting
            ```python
            >>> x = np.linspace(min(data), max(data), 1000)
            >>> pdf_values = normal_dist.pdf(data=x)

            ```
        - Create a PDF plot:
            ```python
            >>> fig, ax = Plot.pdf(x, pdf_values, data)

            ```

            - Further customize the plot if needed
            >>> ax.set_title("Normal Distribution PDF")
            >>> ax.grid(True)

            ```
            ![PDF Plot Example](./../_images/plot/plot-pdf-2.png)

    See Also:
        - Plot.cdf: For plotting cumulative distribution functions
        - Plot.details: For plotting both PDF and CDF together
    """
    fig = plt.figure(figsize=fig_size)
    # gs = gridspec.GridSpec(nrows=1, ncols=2, figure=fig)
    # Plot the histogram and the fitted distribution, save it for each gauge.
    ax = fig.add_subplot()
    ax.plot(qx, pdf_fitted, "-", color="#27408B", linewidth=2)
    ax.hist(
        data_sorted, density=True, histtype="stepfilled", color="#DC143C"
    )  # , alpha=0.2
    ax.set_xlabel(xlabel, fontsize=fontsize)
    ax.set_ylabel(ylabel, fontsize=fontsize)
    plt.show()
    return fig, ax

cdf(qx, cdf_fitted, data_sorted, cdf_weibul, fig_size=(6, 5), xlabel='Actual data', ylabel='cdf', fontsize=11) staticmethod #

Create a cumulative distribution function (CDF) plot.

Generates a plot showing both the fitted cumulative distribution function curve and the empirical CDF points from the actual data for visual comparison.

Parameters:

Name Type Description Default
qx ndarray

Array of x-values for plotting the fitted CDF curve. Typically generated as a linspace between the min and max of the actual data.

required
cdf_fitted ndarray

Array of CDF values corresponding to each point in qx. Usually obtained from a distribution's cdf method.

required
data_sorted ndarray

The sorted actual data points.

required
cdf_weibul ndarray

The empirical CDF values, typically calculated using the Weibull formula or another plotting position formula.

required
fig_size Tuple[float, float]

Figure size as (width, height) in inches. Defaults to (6, 5).

(6, 5)
xlabel str

Label for the x-axis. Defaults to "Actual data".

'Actual data'
ylabel str

Label for the y-axis. Defaults to "cdf".

'cdf'
fontsize int

Font size for labels and legend. Defaults to 11.

11

Returns:

Name Type Description
tuple Tuple[Figure, Axes]

A tuple containing: - Figure: The matplotlib Figure object - Axes: The matplotlib Axes object containing the plot

Examples:

  • Generate some sample data:
    >>> import numpy as np
    >>> from statista.plot import Plot
    >>> from statista.distributions import Normal
    >>> data = np.random.normal(loc=10, scale=2, size=100)
    >>> data_sorted = np.sort(data)
    
  • Calculate empirical CDF using Weibull formula:
    >>> n = len(data_sorted)
    >>> cdf_empirical = np.arange(1, n + 1) / (n + 1)  # Weibull formula
    
  • Fit a normal distribution:
    >>> normal_dist = Normal(data)
    >>> normal_dist.fit_model() # doctest: +SKIP
    -----KS Test--------
    Statistic = 0.08
    Accept Hypothesis
    P value = 0.9084105017744525
    {'loc': np.float64(9.62108385209537), 'scale': np.float64(2.1593427284432147)}
    
  • Generate points for plotting:
    >>> x = np.linspace(min(data), max(data), 1000)
    >>> cdf_values = normal_dist.cdf(data=x)
    
  • Create a CDF plot
    >>> fig, ax = Plot.cdf(x, cdf_values, data_sorted, cdf_empirical)
    
  • Further customize the plot if needed
    >>> ax.set_title("Normal Distribution CDF")
    >>> ax.grid(True)
    
    CDF Plot Example
See Also
  • Plot.pdf: For plotting probability density functions
  • Plot.details: For plotting both PDF and CDF together
Source code in statista/plot.py
@staticmethod
def cdf(
    qx: np.ndarray,
    cdf_fitted: np.ndarray,
    data_sorted: np.ndarray,
    cdf_weibul: np.ndarray,
    fig_size: Tuple[float, float] = (6, 5),
    xlabel: str = "Actual data",
    ylabel: str = "cdf",
    fontsize: int = 11,
) -> Tuple[Figure, Axes]:
    """Create a cumulative distribution function (CDF) plot.

    Generates a plot showing both the fitted cumulative distribution function curve
    and the empirical CDF points from the actual data for visual comparison.

    Args:
        qx: Array of x-values for plotting the fitted CDF curve. Typically generated
            as a linspace between the min and max of the actual data.
        cdf_fitted: Array of CDF values corresponding to each point in qx.
            Usually obtained from a distribution's cdf method.
        data_sorted: The sorted actual data points.
        cdf_weibul: The empirical CDF values, typically calculated using the Weibull formula
            or another plotting position formula.
        fig_size: Figure size as (width, height) in inches. Defaults to (6, 5).
        xlabel: Label for the x-axis. Defaults to "Actual data".
        ylabel: Label for the y-axis. Defaults to "cdf".
        fontsize: Font size for labels and legend. Defaults to 11.

    Returns:
        tuple: A tuple containing:
            - Figure: The matplotlib Figure object
            - Axes: The matplotlib Axes object containing the plot

    Examples:
        - Generate some sample data:
            ```python
            >>> import numpy as np
            >>> from statista.plot import Plot
            >>> from statista.distributions import Normal
            >>> data = np.random.normal(loc=10, scale=2, size=100)
            >>> data_sorted = np.sort(data)

            ```
        - Calculate empirical CDF using Weibull formula:
            ```python
            >>> n = len(data_sorted)
            >>> cdf_empirical = np.arange(1, n + 1) / (n + 1)  # Weibull formula

            ```
        - Fit a normal distribution:
            ```python
            >>> normal_dist = Normal(data)
            >>> normal_dist.fit_model() # doctest: +SKIP
            -----KS Test--------
            Statistic = 0.08
            Accept Hypothesis
            P value = 0.9084105017744525
            {'loc': np.float64(9.62108385209537), 'scale': np.float64(2.1593427284432147)}

            ```
        - Generate points for plotting:
            ```python
            >>> x = np.linspace(min(data), max(data), 1000)
            >>> cdf_values = normal_dist.cdf(data=x)

            ```
        - Create a CDF plot
            ```python
            >>> fig, ax = Plot.cdf(x, cdf_values, data_sorted, cdf_empirical)

            ```
        - Further customize the plot if needed
            ```python
            >>> ax.set_title("Normal Distribution CDF")
            >>> ax.grid(True)

            ```
            ![CDF Plot Example](./../_images/plot/plot-cdf.png)

    See Also:
        - Plot.pdf: For plotting probability density functions
        - Plot.details: For plotting both PDF and CDF together
    """
    fig = plt.figure(figsize=fig_size)
    ax = fig.add_subplot()
    ax.plot(
        qx, cdf_fitted, "-", label="Estimated CDF", color="#27408B", linewidth=2
    )
    ax.scatter(
        data_sorted,
        cdf_weibul,
        label="Empirical CDF",
        color="orangered",
        facecolors="none",
    )
    ax.set_xlabel(xlabel, fontsize=fontsize)
    ax.set_ylabel(ylabel, fontsize=fontsize)
    plt.legend(fontsize=fontsize, framealpha=1)
    plt.show()
    return fig, ax

details(qx, q_act, pdf, cdf_fitted, cdf, fig_size=(10, 5), xlabel='Actual data', ylabel='cdf', fontsize=11) staticmethod #

Create a detailed distribution plot with both PDF and CDF.

Generates a side-by-side plot showing both the probability density function (PDF) and cumulative distribution function (CDF) for a fitted distribution compared with the actual data. This provides a comprehensive view of how well the distribution fits the data.

Parameters:

Name Type Description Default
qx Union[ndarray, list]

Array of x-values for plotting the fitted curves. Typically generated as a linspace between the min and max of the actual data.

required
q_act Union[ndarray, list]

The actual data points.

required
pdf Union[ndarray, list]

Array of PDF values corresponding to each point in qx. Usually obtained from a distribution's pdf method.

required
cdf_fitted Union[ndarray, list]

Array of CDF values corresponding to each point in qx. Usually obtained from a distribution's cdf method.

required
cdf Union[ndarray, list]

The empirical CDF values, typically calculated using the Weibull formula or another plotting position formula.

required
fig_size Tuple[float, float]

Figure size as (width, height) in inches. Defaults to (10, 5).

(10, 5)
xlabel str

Label for the x-axis. Defaults to "Actual data".

'Actual data'
ylabel str

Label for the y-axis of the CDF plot. Defaults to "cdf".

'cdf'
fontsize int

Font size for labels. Defaults to 11.

11

Returns:

Name Type Description
tuple Tuple[Figure, Tuple[Axes, Axes]]

A tuple containing: - Figure: The matplotlib Figure object - tuple: A tuple of two Axes objects (ax1, ax2) where: - ax1: The left subplot containing the PDF - ax2: The right subplot containing the CDF

Examples:

  • Import necessary libraries:
    >>> import numpy as np
    >>> from statista.plot import Plot
    >>> from statista.distributions import Normal
    
  • Generate some sample data:
    >>> data = np.random.normal(loc=10, scale=2, size=100)
    >>> data_sorted = np.sort(data)
    
  • Calculate empirical CDF using Weibull formula:
    >>> n = len(data_sorted)
    >>> cdf_empirical = np.arange(1, n + 1) / (n + 1)  # Weibull formula
    
  • Fit a normal distribution:
    >>> normal_dist = Normal(data_sorted)
    >>> normal_dist.fit_model() # doctest: +SKIP
    -----KS Test--------
    Statistic = 0.06
    Accept Hypothesis
    P value = 0.9942356257694902
    {'loc': np.float64(10.061702421737607), 'scale': np.float64(1.857026806934038)}
    
  • Generate points for plotting:
    >>> x = np.linspace(min(data), max(data), 1000)
    >>> pdf_values = normal_dist.pdf(data=x)
    >>> cdf_values = normal_dist.cdf(data=x)
    
  • Create a detailed plot with both PDF and CDF:
    >>> fig, (ax1, ax2) = Plot.details(x, data, pdf_values, cdf_values, cdf_empirical)
    
  • Further customize the plots if needed:
    >>> ax1.set_title("PDF Comparison")
    >>> ax2.set_title("CDF Comparison")
    >>> fig.suptitle("Normal Distribution Fit", fontsize=14)
    >>> ax1.grid(True)
    >>> ax2.grid(True)
    
    Details Plot Example ```
See Also
  • Plot.pdf: For plotting only the probability density function
  • Plot.cdf: For plotting only the cumulative distribution function
Source code in statista/plot.py
@staticmethod
def details(
    qx: Union[np.ndarray, list],
    q_act: Union[np.ndarray, list],
    pdf: Union[np.ndarray, list],
    cdf_fitted: Union[np.ndarray, list],
    cdf: Union[np.ndarray, list],
    fig_size: Tuple[float, float] = (10, 5),
    xlabel: str = "Actual data",
    ylabel: str = "cdf",
    fontsize: int = 11,
) -> Tuple[Figure, Tuple[Axes, Axes]]:
    """Create a detailed distribution plot with both PDF and CDF.

    Generates a side-by-side plot showing both the probability density function (PDF)
    and cumulative distribution function (CDF) for a fitted distribution compared
    with the actual data. This provides a comprehensive view of how well the
    distribution fits the data.

    Args:
        qx: Array of x-values for plotting the fitted curves. Typically generated
            as a linspace between the min and max of the actual data.
        q_act: The actual data points.
        pdf: Array of PDF values corresponding to each point in qx.
            Usually obtained from a distribution's pdf method.
        cdf_fitted: Array of CDF values corresponding to each point in qx.
            Usually obtained from a distribution's cdf method.
        cdf: The empirical CDF values, typically calculated using the Weibull formula
            or another plotting position formula.
        fig_size: Figure size as (width, height) in inches. Defaults to (10, 5).
        xlabel: Label for the x-axis. Defaults to "Actual data".
        ylabel: Label for the y-axis of the CDF plot. Defaults to "cdf".
        fontsize: Font size for labels. Defaults to 11.

    Returns:
        tuple: A tuple containing:
            - Figure: The matplotlib Figure object
            - tuple: A tuple of two Axes objects (ax1, ax2) where:
                - ax1: The left subplot containing the PDF
                - ax2: The right subplot containing the CDF

    Examples:
        - Import necessary libraries:
            ```python
            >>> import numpy as np
            >>> from statista.plot import Plot
            >>> from statista.distributions import Normal

            ```
        - Generate some sample data:
            ```python
            >>> data = np.random.normal(loc=10, scale=2, size=100)
            >>> data_sorted = np.sort(data)

            ```
        - Calculate empirical CDF using Weibull formula:
            ```python
            >>> n = len(data_sorted)
            >>> cdf_empirical = np.arange(1, n + 1) / (n + 1)  # Weibull formula

            ```
        - Fit a normal distribution:
            ```python
            >>> normal_dist = Normal(data_sorted)
            >>> normal_dist.fit_model() # doctest: +SKIP
            -----KS Test--------
            Statistic = 0.06
            Accept Hypothesis
            P value = 0.9942356257694902
            {'loc': np.float64(10.061702421737607), 'scale': np.float64(1.857026806934038)}

            ```
        - Generate points for plotting:
            ```python
            >>> x = np.linspace(min(data), max(data), 1000)
            >>> pdf_values = normal_dist.pdf(data=x)
            >>> cdf_values = normal_dist.cdf(data=x)

            ```
        - Create a detailed plot with both PDF and CDF:
            ```python
            >>> fig, (ax1, ax2) = Plot.details(x, data, pdf_values, cdf_values, cdf_empirical)

            ```
        - Further customize the plots if needed:
            ```python
            >>> ax1.set_title("PDF Comparison")
            >>> ax2.set_title("CDF Comparison")
            >>> fig.suptitle("Normal Distribution Fit", fontsize=14)
            >>> ax1.grid(True)
            >>> ax2.grid(True)

            ```
            ![Details Plot Example](./../_images/plot/plot-detailed.png)
        ```

    See Also:
        - Plot.pdf: For plotting only the probability density function
        - Plot.cdf: For plotting only the cumulative distribution function
    """
    fig = plt.figure(figsize=fig_size)
    gs = gridspec.GridSpec(nrows=1, ncols=2, figure=fig)
    # Plot the histogram and the fitted distribution, save it for each gauge.
    ax1 = fig.add_subplot(gs[0, 0])
    ax1.plot(qx, pdf, "-", color="#27408B", linewidth=2)
    ax1.hist(q_act, density=True, histtype="stepfilled", color="#DC143C")
    ax1.set_xlabel(xlabel, fontsize=fontsize)
    ax1.set_ylabel("pdf", fontsize=fontsize)

    ax2 = fig.add_subplot(gs[0, 1])
    ax2.plot(qx, cdf_fitted, "-", color="#27408B", linewidth=2)

    q_act.sort()
    ax2.scatter(q_act, cdf, color="#DC143C", facecolors="none")
    ax2.set_xlabel(xlabel, fontsize=fontsize)
    ax2.set_ylabel(ylabel, fontsize=15)
    plt.show()
    return fig, (ax1, ax2)

confidence_level(qth, q_act, q_lower, q_upper, fig_size=(6, 6), fontsize=11, alpha=None, marker_size=10) staticmethod #

Create a confidence interval plot for distribution quantiles.

Generates a plot showing the theoretical quantiles, actual data points, and confidence interval bounds. This is useful for assessing how well a distribution fits the data and visualizing the uncertainty in the fit.

Parameters:

Name Type Description Default
qth Union[ndarray, list]

Theoretical quantiles (obtained using the inverse_cdf method). These values represent what the distribution predicts for each quantile.

required
q_act Union[ndarray, list]

Actual data points, which will be sorted within the function. These are compared against the theoretical quantiles.

required
q_lower Union[ndarray, list]

Lower limit of the confidence interval for each theoretical quantile. Usually calculated based on the distribution parameters and a significance level.

required
q_upper Union[ndarray, list]

Upper limit of the confidence interval for each theoretical quantile. Usually calculated based on the distribution parameters and a significance level.

required
fig_size Tuple[float, float]

Figure size as (width, height) in inches. Defaults to (6, 6).

(6, 6)
fontsize int

Font size for labels and legend. Defaults to 11.

11
alpha Number

Significance level used for the confidence intervals (e.g., 0.05 for 95% CI). Used only for labeling the legend; the actual intervals must be pre-calculated.

None
marker_size int

Size of the markers for the upper and lower bounds. Defaults to 10.

10

Returns:

Name Type Description
tuple Tuple[Figure, Axes]

A tuple containing: - Figure: The matplotlib Figure object - Axes: The matplotlib Axes object containing the plot

Examples:

  • Import necessary libraries:
    >>> import numpy as np
    >>> from statista.plot import Plot
    >>> from statista.distributions import Normal
    
  • Generate some sample data:
    >>> data = np.random.normal(loc=10, scale=2, size=100)
    
  • Fit a normal distribution:
    >>> normal_dist = Normal(data)
    >>> normal_dist.fit_model() # doctest: +SKIP
    -----KS Test--------
    Statistic = 0.07
    Accept Hypothesis
    P value = 0.9684099261397212
    {'loc': np.float64(10.51674893337459), 'scale': np.float64(2.002961856532672)}
    
  • Generate theoretical quantiles:
    >>> p = np.linspace(0.01, 0.99, 100)  # Probability points
    >>> theoretical_quantiles = normal_dist.inverse_cdf(p)
    
  • Calculate confidence intervals (simplified example):
  • In practice, these would be calculated based on the distribution parameters
    >>> std_error = 0.5  # Example standard error
    >>> z_value = 1.96  # For 95% confidence interval
    >>> lower_ci = theoretical_quantiles - z_value * std_error
    >>> upper_ci = theoretical_quantiles + z_value * std_error
    
  • Create the confidence interval plot:
    >>> fig, ax = Plot.confidence_level(
    ...     theoretical_quantiles, data, lower_ci, upper_ci, alpha=0.05
    ... )
    
  • Further customize the plot if needed
    >>> ax.set_title("Normal Distribution Quantile Plot with 95% CI")
    >>> ax.grid(True)
    
    Confidence Level Plot Example
Notes

The function automatically sorts the actual data points (q_act) before plotting.

The 1:1 line represents perfect agreement between theoretical and actual values. Points falling along this line indicate a good fit of the distribution to the data.

Points falling outside the confidence intervals suggest potential issues with the distribution fit at those quantiles.

See Also
  • Plot.details: For plotting PDF and CDF together
Source code in statista/plot.py
@staticmethod
def confidence_level(
    qth: Union[np.ndarray, list],
    q_act: Union[np.ndarray, list],
    q_lower: Union[np.ndarray, list],
    q_upper: Union[np.ndarray, list],
    fig_size: Tuple[float, float] = (6, 6),
    fontsize: int = 11,
    alpha: Number = None,
    marker_size: int = 10,
) -> Tuple[Figure, Axes]:
    """Create a confidence interval plot for distribution quantiles.

    Generates a plot showing the theoretical quantiles, actual data points, and
    confidence interval bounds. This is useful for assessing how well a distribution
    fits the data and visualizing the uncertainty in the fit.

    Args:
        qth: Theoretical quantiles (obtained using the inverse_cdf method).
            These values represent what the distribution predicts for each quantile.
        q_act: Actual data points, which will be sorted within the function.
            These are compared against the theoretical quantiles.
        q_lower: Lower limit of the confidence interval for each theoretical quantile.
            Usually calculated based on the distribution parameters and a significance level.
        q_upper: Upper limit of the confidence interval for each theoretical quantile.
            Usually calculated based on the distribution parameters and a significance level.
        fig_size: Figure size as (width, height) in inches. Defaults to (6, 6).
        fontsize: Font size for labels and legend. Defaults to 11.
        alpha: Significance level used for the confidence intervals (e.g., 0.05 for 95% CI).
            Used only for labeling the legend; the actual intervals must be pre-calculated.
        marker_size: Size of the markers for the upper and lower bounds. Defaults to 10.

    Returns:
        tuple: A tuple containing:
            - Figure: The matplotlib Figure object
            - Axes: The matplotlib Axes object containing the plot

    Examples:
        - Import necessary libraries:
            ```python
            >>> import numpy as np
            >>> from statista.plot import Plot
            >>> from statista.distributions import Normal
            ```
        - Generate some sample data:
            ```python
            >>> data = np.random.normal(loc=10, scale=2, size=100)

            ```
        - Fit a normal distribution:
            ```python
            >>> normal_dist = Normal(data)
            >>> normal_dist.fit_model() # doctest: +SKIP
            -----KS Test--------
            Statistic = 0.07
            Accept Hypothesis
            P value = 0.9684099261397212
            {'loc': np.float64(10.51674893337459), 'scale': np.float64(2.002961856532672)}

            ```
        - Generate theoretical quantiles:
            ```python
            >>> p = np.linspace(0.01, 0.99, 100)  # Probability points
            >>> theoretical_quantiles = normal_dist.inverse_cdf(p)

            ```
        - Calculate confidence intervals (simplified example):
        - In practice, these would be calculated based on the distribution parameters
            ```python
            >>> std_error = 0.5  # Example standard error
            >>> z_value = 1.96  # For 95% confidence interval
            >>> lower_ci = theoretical_quantiles - z_value * std_error
            >>> upper_ci = theoretical_quantiles + z_value * std_error

            ```
        - Create the confidence interval plot:
            ```python
            >>> fig, ax = Plot.confidence_level(
            ...     theoretical_quantiles, data, lower_ci, upper_ci, alpha=0.05
            ... )

            ```
        - Further customize the plot if needed
            ```python
            >>> ax.set_title("Normal Distribution Quantile Plot with 95% CI")
            >>> ax.grid(True)

            ```
            ![Confidence Level Plot Example](./../_images/plot/plot-confidence-level.png)

    Notes:
        The function automatically sorts the actual data points (q_act) before plotting.

        The 1:1 line represents perfect agreement between theoretical and actual values.
        Points falling along this line indicate a good fit of the distribution to the data.

        Points falling outside the confidence intervals suggest potential issues with
        the distribution fit at those quantiles.

    See Also:
        - Plot.details: For plotting PDF and CDF together
    """
    q_act.sort()

    fig = plt.figure(figsize=fig_size)
    ax = fig.add_subplot()
    ax.plot(qth, qth, "-.", color="#3D59AB", linewidth=2, label="Theoretical Data")
    # confidence interval
    ax.plot(
        qth,
        q_lower,
        "*--",
        color="grey",
        markersize=marker_size,
        label=f"Lower limit ({int((1 - alpha) * 100)} % CI)",
    )
    ax.plot(
        qth,
        q_upper,
        "*--",
        color="grey",
        markersize=marker_size,
        label=f"Upper limit ({int((1 - alpha) * 100)} % CI)",
    )
    ax.scatter(
        qth,
        q_act,
        color="#DC143C",
        facecolors="none",
        label="Actual Data",
        zorder=10,
    )
    ax.legend(fontsize=fontsize, framealpha=1)
    ax.set_xlabel("Theoretical Values", fontsize=fontsize)
    ax.set_ylabel("Actual Values", fontsize=fontsize)
    plt.show()
    return fig, ax