Branch data Line data Source code
1 : : /*************************************************************************** 2 : : qgsstringstatisticalsummary.h 3 : : ----------------------------- 4 : : Date : May 2016 5 : : Copyright : (C) 2016 by Nyall Dawson 6 : : Email : nyall dot dawson at gmail dot com 7 : : *************************************************************************** 8 : : * * 9 : : * This program is free software; you can redistribute it and/or modify * 10 : : * it under the terms of the GNU General Public License as published by * 11 : : * the Free Software Foundation; either version 2 of the License, or * 12 : : * (at your option) any later version. * 13 : : * * 14 : : ***************************************************************************/ 15 : : 16 : : #ifndef QGSSTRINGSTATISTICALSUMMARY_H 17 : : #define QGSSTRINGSTATISTICALSUMMARY_H 18 : : 19 : : #include <QSet> 20 : : #include <QVariantList> 21 : : 22 : : #include "qgis_core.h" 23 : : #include "qgis.h" 24 : : 25 : : /*************************************************************************** 26 : : * This class is considered CRITICAL and any change MUST be accompanied with 27 : : * full unit tests in test_qgsstringstatisticalsummary.py. 28 : : * See details in QEP #17 29 : : ****************************************************************************/ 30 : : 31 : : /** 32 : : * \ingroup core 33 : : * \class QgsStringStatisticalSummary 34 : : * \brief Calculator for summary statistics and aggregates for a list of strings. 35 : : * 36 : : * Statistics are calculated by calling calculate() and passing a list of strings. The 37 : : * individual statistics can then be retrieved using the associated methods. Note that not all statistics 38 : : * are calculated by default. Statistics which require slower computations are only calculated by 39 : : * specifying the statistic in the constructor or via setStatistics(). 40 : : * 41 : : * \since QGIS 2.16 42 : : */ 43 : : 44 : 0 : class CORE_EXPORT QgsStringStatisticalSummary 45 : : { 46 : : public: 47 : : 48 : : //! Enumeration of flags that specify statistics to be calculated 49 : : enum Statistic 50 : : { 51 : : Count = 1, //!< Count 52 : : CountDistinct = 2, //!< Number of distinct string values 53 : : CountMissing = 4, //!< Number of missing (null) values 54 : : Min = 8, //!< Minimum string value 55 : : Max = 16, //!< Maximum string value 56 : : MinimumLength = 32, //!< Minimum length of string 57 : : MaximumLength = 64, //!< Maximum length of string 58 : : MeanLength = 128, //!< Mean length of strings 59 : : Minority = 256, //!< Minority of strings 60 : : Majority = 512, //!< Majority of strings 61 : : All = Count | CountDistinct | CountMissing | Min | Max | MinimumLength | MaximumLength | MeanLength | Minority | Majority, //!< All statistics 62 : : }; 63 : : Q_DECLARE_FLAGS( Statistics, Statistic ) 64 : : 65 : : /** 66 : : * Constructor for QgsStringStatistics 67 : : * \param stats flags for statistics to calculate 68 : : */ 69 : : QgsStringStatisticalSummary( QgsStringStatisticalSummary::Statistics stats = QgsStringStatisticalSummary::All ); 70 : : 71 : : /** 72 : : * Returns flags which specify which statistics will be calculated. Some statistics 73 : : * are always calculated (e.g., count). 74 : : * \see setStatistics 75 : : */ 76 : : Statistics statistics() const { return mStatistics; } 77 : : 78 : : /** 79 : : * Sets flags which specify which statistics will be calculated. Some statistics 80 : : * are always calculated (e.g., count). 81 : : * \param stats flags for statistics to calculate 82 : : * \see statistics 83 : : */ 84 : : void setStatistics( QgsStringStatisticalSummary::Statistics stats ) { mStatistics = stats; } 85 : : 86 : : /** 87 : : * Resets the calculated values 88 : : */ 89 : : void reset(); 90 : : 91 : : /** 92 : : * Calculates summary statistics for an entire list of strings at once. 93 : : * \param values list of strings 94 : : * \see calculateFromVariants() 95 : : * \see addString() 96 : : */ 97 : : void calculate( const QStringList &values ); 98 : : 99 : : /** 100 : : * Calculates summary statistics for an entire list of variants at once. Any 101 : : * non-string variants will be ignored. 102 : : * \param values list of variants 103 : : * \see calculate() 104 : : * \see addValue() 105 : : */ 106 : : void calculateFromVariants( const QVariantList &values ); 107 : : 108 : : /** 109 : : * Adds a single string to the statistics calculation. Calling this method 110 : : * allows strings to be added to the calculation one at a time. For large 111 : : * quantities of strings this may be more efficient then first adding all the 112 : : * strings to a list and calling calculate(). 113 : : * \param string string to add 114 : : * \note call reset() before adding the first string using this method 115 : : * to clear the results from any previous calculations 116 : : * \note finalize() must be called after adding the final string and before 117 : : * retrieving calculated statistics. 118 : : * \see calculate() 119 : : * \see addValue() 120 : : * \see finalize() 121 : : */ 122 : : void addString( const QString &string ); 123 : : 124 : : /** 125 : : * Adds a single variant to the statistics calculation. Calling this method 126 : : * allows variants to be added to the calculation one at a time. For large 127 : : * quantities of variants this may be more efficient then first adding all the 128 : : * variants to a list and calling calculateFromVariants(). 129 : : * \param value variant to add 130 : : * \note call reset() before adding the first string using this method 131 : : * to clear the results from any previous calculations 132 : : * \note finalize() must be called after adding the final value and before 133 : : * retrieving calculated statistics. 134 : : * \see calculateFromVariants() 135 : : * \see finalize() 136 : : */ 137 : : void addValue( const QVariant &value ); 138 : : 139 : : /** 140 : : * Must be called after adding all strings with addString() and before retrieving 141 : : * any calculated string statistics. 142 : : * \see addString() 143 : : */ 144 : : void finalize(); 145 : : 146 : : /** 147 : : * Returns the value of a specified statistic 148 : : * \param stat statistic to return 149 : : * \returns calculated value of statistic 150 : : */ 151 : : QVariant statistic( QgsStringStatisticalSummary::Statistic stat ) const; 152 : : 153 : : /** 154 : : * Returns the calculated count of values. 155 : : */ 156 : : int count() const { return mCount; } 157 : : 158 : : /** 159 : : * Returns the number of distinct string values. 160 : : * \see distinctValues() 161 : : */ 162 : : int countDistinct() const { return mValues.keys().count(); } 163 : : 164 : : /** 165 : : * Returns the set of distinct string values. 166 : : * \see countDistinct() 167 : : */ 168 : : QSet< QString > distinctValues() const { return qgis::listToSet( mValues.keys() ); } 169 : : 170 : : /** 171 : : * Returns the number of missing (null) string values. 172 : : */ 173 : : int countMissing() const { return mCountMissing; } 174 : : 175 : : /** 176 : : * Returns the minimum (non-null) string value. 177 : : */ 178 : : QString min() const { return mMin; } 179 : : 180 : : /** 181 : : * Returns the maximum (non-null) string value. 182 : : */ 183 : : QString max() const { return mMax; } 184 : : 185 : : /** 186 : : * Returns the minimum length of strings. 187 : : */ 188 : : int minLength() const { return mMinLength; } 189 : : 190 : : /** 191 : : * Returns the maximum length of strings. 192 : : */ 193 : : int maxLength() const { return mMaxLength; } 194 : : 195 : : /** 196 : : * Returns the mean length of strings. 197 : : * \since QGIS 3.0 198 : : */ 199 : : double meanLength() const { return mMeanLength; } 200 : : 201 : : /** 202 : : * Returns the least common string. The minority is the value with least occurrences in the list 203 : : * This is only calculated if Statistic::Minority has been specified in the constructor 204 : : * or via setStatistics. If multiple values match, return the first value relative to the 205 : : * initial values order. 206 : : * \see majority 207 : : * \since QGIS 3.14 208 : : */ 209 : : QString minority() const { return mMinority; } 210 : : 211 : : /** 212 : : * Returns the most common string. The majority is the value with most occurrences in the list 213 : : * This is only calculated if Statistic::Majority has been specified in the constructor 214 : : * or via setStatistics. If multiple values match, return the first value relative to the 215 : : * initial values order. 216 : : * \see minority 217 : : * \since QGIS 3.14 218 : : */ 219 : : QString majority() const { return mMajority; } 220 : : 221 : : /** 222 : : * Returns the friendly display name for a statistic 223 : : * \param statistic statistic to return name for 224 : : */ 225 : : static QString displayName( QgsStringStatisticalSummary::Statistic statistic ); 226 : : 227 : : private: 228 : : 229 : : Statistics mStatistics; 230 : : 231 : : int mCount; 232 : : QMap< QString, int > mValues; 233 : : int mCountMissing; 234 : : QString mMin; 235 : : QString mMax; 236 : : int mMinLength; 237 : : int mMaxLength; 238 : : long mSumLengths; 239 : : double mMeanLength; 240 : : QString mMinority; 241 : : QString mMajority; 242 : : 243 : : void testString( const QString &string ); 244 : : }; 245 : : 246 : : Q_DECLARE_OPERATORS_FOR_FLAGS( QgsStringStatisticalSummary::Statistics ) 247 : : 248 : : #endif // QGSSTRINGSTATISTICALSUMMARY_H