Branch data Line data Source code
1 : : /*************************************************************************** 2 : : qgsstatisticalsummary.h 3 : : -------------------------------------- 4 : : Date : May 2015 5 : : Copyright : (C) 2015 by Nyall Dawson 6 : : Email : nyall dot dawson at gmail dot com 7 : : *************************************************************************** 8 : : * * 9 : : * This program is free software; you can redistribute it and/or modify * 10 : : * it under the terms of the GNU General Public License as published by * 11 : : * the Free Software Foundation; either version 2 of the License, or * 12 : : * (at your option) any later version. * 13 : : * * 14 : : ***************************************************************************/ 15 : : 16 : : #ifndef QGSSTATISTICALSUMMARY_H 17 : : #define QGSSTATISTICALSUMMARY_H 18 : : 19 : : #include <QMap> 20 : : #include <QVariant> 21 : : #include <cmath> 22 : : #include "qgis_core.h" 23 : : 24 : : /*************************************************************************** 25 : : * This class is considered CRITICAL and any change MUST be accompanied with 26 : : * full unit tests in testqgsstatisticalsummary.cpp. 27 : : * See details in QEP #17 28 : : ****************************************************************************/ 29 : : 30 : : /** 31 : : * \ingroup core 32 : : * \class QgsStatisticalSummary 33 : : * \brief Calculator for summary statistics for a list of doubles. 34 : : * 35 : : * Statistics are calculated by calling calculate() and passing a list of doubles. The 36 : : * individual statistics can then be retrieved using the associated methods. Note that not all statistics 37 : : * are calculated by default. Statistics which require slower computations are only calculated by 38 : : * specifying the statistic in the constructor or via setStatistics(). 39 : : * 40 : : * \since QGIS 2.9 41 : : */ 42 : : 43 : 0 : class CORE_EXPORT QgsStatisticalSummary 44 : : { 45 : : public: 46 : : 47 : : //! Enumeration of flags that specify statistics to be calculated 48 : : enum Statistic 49 : : { 50 : : Count = 1 << 0, //!< Count 51 : : CountMissing = 1 << 15, //!< Number of missing (null) values 52 : : Sum = 1 << 1, //!< Sum of values 53 : : Mean = 1 << 2, //!< Mean of values 54 : : Median = 1 << 3, //!< Median of values 55 : : StDev = 1 << 4, //!< Standard deviation of values 56 : : StDevSample = 1 << 5, //!< Sample standard deviation of values 57 : : Min = 1 << 6, //!< Min of values 58 : : Max = 1 << 7, //!< Max of values 59 : : Range = 1 << 8, //!< Range of values (max - min) 60 : : Minority = 1 << 9, //!< Minority of values 61 : : Majority = 1 << 10, //!< Majority of values 62 : : Variety = 1 << 11, //!< Variety (count of distinct) values 63 : : FirstQuartile = 1 << 12, //!< First quartile 64 : : ThirdQuartile = 1 << 13, //!< Third quartile 65 : : InterQuartileRange = 1 << 14, //!< Inter quartile range (IQR) 66 : : First = 1 << 16, //!< First value (since QGIS 3.6) 67 : : Last = 1 << 17, //!< Last value (since QGIS 3.6) 68 : : All = Count | CountMissing | Sum | Mean | Median | StDev | Max | Min | Range | Minority | Majority | Variety | FirstQuartile | ThirdQuartile | InterQuartileRange | First | Last 69 : : }; 70 : : Q_DECLARE_FLAGS( Statistics, Statistic ) 71 : : 72 : : /** 73 : : * Constructor for QgsStatisticalSummary 74 : : * \param stats flags for statistics to calculate 75 : : */ 76 : : QgsStatisticalSummary( QgsStatisticalSummary::Statistics stats = QgsStatisticalSummary::All ); 77 : : 78 : 0 : virtual ~QgsStatisticalSummary() = default; 79 : : 80 : : /** 81 : : * Returns flags which specify which statistics will be calculated. Some statistics 82 : : * are always calculated (e.g., sum, min and max). 83 : : * \see setStatistics 84 : : */ 85 : : Statistics statistics() const { return mStatistics; } 86 : : 87 : : /** 88 : : * Sets flags which specify which statistics will be calculated. Some statistics 89 : : * are always calculated (e.g., sum, min and max). 90 : : * \param stats flags for statistics to calculate 91 : : * \see statistics 92 : : */ 93 : : void setStatistics( QgsStatisticalSummary::Statistics stats ); 94 : : 95 : : /** 96 : : * Resets the calculated values 97 : : */ 98 : : void reset(); 99 : : 100 : : /** 101 : : * Calculates summary statistics for a list of values 102 : : * \param values list of doubles 103 : : */ 104 : : void calculate( const QList<double> &values ); 105 : : 106 : : /** 107 : : * Adds a single value to the statistics calculation. Calling this method 108 : : * allows values to be added to the calculation one at a time. For large 109 : : * quantities of values this may be more efficient then first adding all the 110 : : * values to a list and calling calculate(). 111 : : * \param value value to add 112 : : * \note call reset() before adding the first value using this method 113 : : * to clear the results from any previous calculations 114 : : * \note finalize() must be called after adding the final value and before 115 : : * retrieving calculated statistics. 116 : : * \see calculate() 117 : : * \see addVariant() 118 : : * \see finalize() 119 : : * \since QGIS 2.16 120 : : */ 121 : : void addValue( double value ); 122 : : 123 : : /** 124 : : * Adds a single value to the statistics calculation. Calling this method 125 : : * allows values to be added to the calculation one at a time. For large 126 : : * quantities of values this may be more efficient then first adding all the 127 : : * values to a list and calling calculate(). 128 : : * \param value variant containing to add. Non-numeric values are treated as null. 129 : : * \note call reset() before adding the first value using this method 130 : : * to clear the results from any previous calculations 131 : : * \note finalize() must be called after adding the final value and before 132 : : * retrieving calculated statistics. 133 : : * \see addValue() 134 : : * \see calculate() 135 : : * \see finalize() 136 : : * \since QGIS 2.16 137 : : */ 138 : : void addVariant( const QVariant &value ); 139 : : 140 : : /** 141 : : * Must be called after adding all values with addValues() and before retrieving 142 : : * any calculated statistics. 143 : : * \see addValue() 144 : : * \see addVariant() 145 : : * \since QGIS 2.16 146 : : */ 147 : : void finalize(); 148 : : 149 : : /** 150 : : * Returns the value of a specified statistic 151 : : * \param stat statistic to return 152 : : * \returns calculated value of statistic. A NaN value may be returned for invalid 153 : : * statistics. 154 : : */ 155 : : double statistic( QgsStatisticalSummary::Statistic stat ) const; 156 : : 157 : : /** 158 : : * Returns calculated count of values 159 : : */ 160 : 0 : int count() const { return mCount; } 161 : : 162 : : /** 163 : : * Returns the number of missing (null) values 164 : : * \since QGIS 2.16 165 : : */ 166 : : int countMissing() const { return mMissing; } 167 : : 168 : : /** 169 : : * Returns calculated sum of values 170 : : */ 171 : 0 : double sum() const { return mSum; } 172 : : 173 : : /** 174 : : * Returns calculated mean of values. A NaN value may be returned if the mean cannot 175 : : * be calculated. 176 : : */ 177 : 0 : double mean() const { return mMean; } 178 : : 179 : : /** 180 : : * Returns calculated median of values. This is only calculated if Statistic::Median has 181 : : * been specified in the constructor or via setStatistics. A NaN value may be returned if the median cannot 182 : : * be calculated. 183 : : */ 184 : : double median() const { return mMedian; } 185 : : 186 : : /** 187 : : * Returns calculated minimum from values. A NaN value may be returned if the minimum cannot 188 : : * be calculated. 189 : : */ 190 : 0 : double min() const { return mMin; } 191 : : 192 : : /** 193 : : * Returns calculated maximum from values. A NaN value may be returned if the maximum cannot 194 : : * be calculated. 195 : : */ 196 : 0 : double max() const { return mMax; } 197 : : 198 : : /** 199 : : * Returns calculated range (difference between maximum and minimum values). A NaN value may be returned if the range cannot 200 : : * be calculated. 201 : : */ 202 : : double range() const { return std::isnan( mMax ) || std::isnan( mMin ) ? std::numeric_limits<double>::quiet_NaN() : mMax - mMin; } 203 : : 204 : : /** 205 : : * Returns the first value obtained. A NaN value may be returned if no values were encountered. 206 : : * 207 : : * \see last() 208 : : * \since QGIS 3.6 209 : : */ 210 : : double first() const { return mFirst; } 211 : : 212 : : /** 213 : : * Returns the last value obtained. A NaN value may be returned if no values were encountered. 214 : : * 215 : : * \see first() 216 : : * \since QGIS 3.6 217 : : */ 218 : : double last() const { return mLast; } 219 : : 220 : : /** 221 : : * Returns population standard deviation. This is only calculated if Statistic::StDev has 222 : : * been specified in the constructor or via setStatistics. A NaN value may be returned if the standard deviation cannot 223 : : * be calculated. 224 : : * \see sampleStDev 225 : : */ 226 : : double stDev() const { return mStdev; } 227 : : 228 : : /** 229 : : * Returns sample standard deviation. This is only calculated if Statistic::StDev has 230 : : * been specified in the constructor or via setStatistics. A NaN value may be returned if the standard deviation cannot 231 : : * be calculated. 232 : : * \see stDev 233 : : */ 234 : : double sampleStDev() const { return mSampleStdev; } 235 : : 236 : : /** 237 : : * Returns variety of values. The variety is the count of unique values from the list. 238 : : * This is only calculated if Statistic::Variety has been specified in the constructor 239 : : * or via setStatistics. 240 : : */ 241 : : int variety() const { return mValueCount.count(); } 242 : : 243 : : /** 244 : : * Returns minority of values. The minority is the value with least occurrences in the list. 245 : : * This is only calculated if Statistic::Minority has been specified in the constructor 246 : : * or via setStatistics. If multiple values match, return the first value relative to the 247 : : * initial values order. A NaN value may be returned if the minority cannot be calculated. 248 : : * \see majority 249 : : */ 250 : : double minority() const { return mMinority; } 251 : : 252 : : /** 253 : : * Returns majority of values. The majority is the value with most occurrences in the list. 254 : : * This is only calculated if Statistic::Majority has been specified in the constructor 255 : : * or via setStatistics. If multiple values match, return the first value relative to the 256 : : * initial values order. A NaN value may be returned if the minority cannot be calculated. 257 : : * \see minority 258 : : */ 259 : : double majority() const { return mMajority; } 260 : : 261 : : /** 262 : : * Returns the first quartile of the values. The quartile is calculated using the 263 : : * "Tukey's hinges" method. A NaN value may be returned if the first quartile cannot 264 : : * be calculated. 265 : : * \see thirdQuartile 266 : : * \see interQuartileRange 267 : : */ 268 : : double firstQuartile() const { return mFirstQuartile; } 269 : : 270 : : /** 271 : : * Returns the third quartile of the values. The quartile is calculated using the 272 : : * "Tukey's hinges" method. A NaN value may be returned if the third quartile cannot 273 : : * be calculated. 274 : : * \see firstQuartile 275 : : * \see interQuartileRange 276 : : */ 277 : : double thirdQuartile() const { return mThirdQuartile; } 278 : : 279 : : /** 280 : : * Returns the inter quartile range of the values. The quartiles are calculated using the 281 : : * "Tukey's hinges" method. A NaN value may be returned if the IQR cannot 282 : : * be calculated. 283 : : * \see firstQuartile 284 : : * \see thirdQuartile 285 : : */ 286 : 0 : double interQuartileRange() const { return std::isnan( mThirdQuartile ) || std::isnan( mFirstQuartile ) ? std::numeric_limits<double>::quiet_NaN() : mThirdQuartile - mFirstQuartile; } 287 : : 288 : : /** 289 : : * Returns the friendly display name for a \a statistic. 290 : : * \see shortName() 291 : : */ 292 : : static QString displayName( QgsStatisticalSummary::Statistic statistic ); 293 : : 294 : : /** 295 : : * Returns a short, friendly display name for a \a statistic, suitable for use in a field name. 296 : : * \see displayName() 297 : : * \since QGIS 3.6 298 : : */ 299 : : static QString shortName( QgsStatisticalSummary::Statistic statistic ); 300 : : 301 : : private: 302 : : 303 : : Statistics mStatistics; 304 : : 305 : : int mCount; 306 : : int mMissing; 307 : : double mSum; 308 : : double mMean; 309 : : double mMedian; 310 : : double mMin; 311 : : double mMax; 312 : : double mStdev; 313 : : double mSampleStdev; 314 : : double mMinority; 315 : : double mMajority; 316 : : double mFirstQuartile; 317 : : double mThirdQuartile; 318 : : double mFirst; 319 : : double mLast; 320 : : QMap< double, int > mValueCount; 321 : : QList< double > mValues; 322 : : bool mRequiresAllValueStorage = false; 323 : : bool mRequiresHisto = false; 324 : : }; 325 : : 326 : 0 : Q_DECLARE_OPERATORS_FOR_FLAGS( QgsStatisticalSummary::Statistics ) 327 : : 328 : : #endif // QGSSTATISTICALSUMMARY_H