Branch data Line data Source code
1 : : /*************************************************************************** 2 : : qgsstringutils.h 3 : : ---------------- 4 : : begin : June 2015 5 : : copyright : (C) 2015 by Nyall Dawson 6 : : email : nyall dot dawson at gmail dot com 7 : : *************************************************************************** 8 : : * * 9 : : * This program is free software; you can redistribute it and/or modify * 10 : : * it under the terms of the GNU General Public License as published by * 11 : : * the Free Software Foundation; either version 2 of the License, or * 12 : : * (at your option) any later version. * 13 : : * * 14 : : ***************************************************************************/ 15 : : 16 : : #include "qgis_core.h" 17 : : #include <QString> 18 : : #include <QRegExp> 19 : : #include <QList> 20 : : #include <QDomDocument> 21 : : #include <QFont> // for enum values 22 : : #include "qgis.h" 23 : : 24 : : #ifndef QGSSTRINGUTILS_H 25 : : #define QGSSTRINGUTILS_H 26 : : 27 : : #define FUZZY_SCORE_WORD_MATCH 5 28 : : #define FUZZY_SCORE_NEW_MATCH 3 29 : : #define FUZZY_SCORE_CONSECUTIVE_MATCH 4 30 : : 31 : : /** 32 : : * \ingroup core 33 : : * \class QgsStringReplacement 34 : : * \brief A representation of a single string replacement. 35 : : * \since QGIS 3.0 36 : : */ 37 : : 38 : 0 : class CORE_EXPORT QgsStringReplacement 39 : : { 40 : : 41 : : public: 42 : : 43 : : /** 44 : : * Constructor for QgsStringReplacement. 45 : : * \param match string to match 46 : : * \param replacement string to replace match with 47 : : * \param caseSensitive set to TRUE for a case sensitive match 48 : : * \param wholeWordOnly set to TRUE to match complete words only, or FALSE to allow partial word matches 49 : : */ 50 : : QgsStringReplacement( const QString &match, 51 : : const QString &replacement, 52 : : bool caseSensitive = false, 53 : : bool wholeWordOnly = false ); 54 : : 55 : : //! Returns the string matched by this object 56 : : QString match() const { return mMatch; } 57 : : 58 : : //! Returns the string to replace matches with 59 : : QString replacement() const { return mReplacement; } 60 : : 61 : : //! Returns TRUE if match is case sensitive 62 : : bool caseSensitive() const { return mCaseSensitive; } 63 : : 64 : : //! Returns TRUE if match only applies to whole words, or FALSE if partial word matches are permitted 65 : : bool wholeWordOnly() const { return mWholeWordOnly; } 66 : : 67 : : /** 68 : : * Processes a given input string, applying any valid replacements which should be made. 69 : : * \param input input string 70 : : * \returns input string with any matches replaced by replacement string 71 : : */ 72 : : QString process( const QString &input ) const; 73 : : 74 : : bool operator==( const QgsStringReplacement &other ) 75 : : { 76 : : return mMatch == other.mMatch 77 : : && mReplacement == other.mReplacement 78 : : && mCaseSensitive == other.mCaseSensitive 79 : : && mWholeWordOnly == other.mWholeWordOnly; 80 : : } 81 : : 82 : : /** 83 : : * Returns a map of the replacement properties. 84 : : * \see fromProperties() 85 : : */ 86 : : QgsStringMap properties() const; 87 : : 88 : : /** 89 : : * Creates a new QgsStringReplacement from an encoded properties map. 90 : : * \see properties() 91 : : */ 92 : : static QgsStringReplacement fromProperties( const QgsStringMap &properties ); 93 : : 94 : : private: 95 : : 96 : : QString mMatch; 97 : : 98 : : QString mReplacement; 99 : : 100 : : bool mCaseSensitive; 101 : : 102 : : bool mWholeWordOnly; 103 : : 104 : : QRegExp mRx; 105 : : }; 106 : : 107 : : 108 : : /** 109 : : * \ingroup core 110 : : * \class QgsStringReplacementCollection 111 : : * \brief A collection of string replacements (specified using QgsStringReplacement objects). 112 : : * \since QGIS 3.0 113 : : */ 114 : : 115 : 0 : class CORE_EXPORT QgsStringReplacementCollection 116 : : { 117 : : 118 : : public: 119 : : 120 : : /** 121 : : * Constructor for QgsStringReplacementCollection 122 : : * \param replacements initial list of string replacements 123 : : */ 124 : 0 : QgsStringReplacementCollection( const QList< QgsStringReplacement > &replacements = QList< QgsStringReplacement >() ) 125 : 0 : : mReplacements( replacements ) 126 : 0 : {} 127 : : 128 : : /** 129 : : * Returns the list of string replacements in this collection. 130 : : * \see setReplacements() 131 : : */ 132 : : QList< QgsStringReplacement > replacements() const { return mReplacements; } 133 : : 134 : : /** 135 : : * Sets the list of string replacements in this collection. 136 : : * \param replacements list of string replacements to apply. Replacements are applied in the 137 : : * order they are specified here. 138 : : * \see replacements() 139 : : */ 140 : : void setReplacements( const QList< QgsStringReplacement > &replacements ) 141 : : { 142 : : mReplacements = replacements; 143 : : } 144 : : 145 : : /** 146 : : * Processes a given input string, applying any valid replacements which should be made 147 : : * using QgsStringReplacement objects contained by this collection. Replacements 148 : : * are made in order of the QgsStringReplacement objects contained in the collection. 149 : : * \param input input string 150 : : * \returns input string with any matches replaced by replacement string 151 : : */ 152 : : QString process( const QString &input ) const; 153 : : 154 : : /** 155 : : * Writes the collection state to an XML element. 156 : : * \param elem target DOM element 157 : : * \param doc DOM document 158 : : * \see readXml() 159 : : */ 160 : : void writeXml( QDomElement &elem, QDomDocument &doc ) const; 161 : : 162 : : /** 163 : : * Reads the collection state from an XML element. 164 : : * \param elem DOM element 165 : : * \see writeXml() 166 : : */ 167 : : void readXml( const QDomElement &elem ); 168 : : 169 : : private: 170 : : 171 : : QList< QgsStringReplacement > mReplacements; 172 : : 173 : : 174 : : }; 175 : : 176 : : /** 177 : : * \ingroup core 178 : : * \class QgsStringUtils 179 : : * \brief Utility functions for working with strings. 180 : : * \since QGIS 2.11 181 : : */ 182 : : 183 : : class CORE_EXPORT QgsStringUtils 184 : : { 185 : : public: 186 : : 187 : : //! Capitalization options 188 : : enum Capitalization 189 : : { 190 : : MixedCase = QFont::MixedCase, //!< Mixed case, ie no change 191 : : AllUppercase = QFont::AllUppercase, //!< Convert all characters to uppercase 192 : : AllLowercase = QFont::AllLowercase, //!< Convert all characters to lowercase 193 : : ForceFirstLetterToCapital = QFont::Capitalize, //!< Convert just the first letter of each word to uppercase, leave the rest untouched 194 : : TitleCase = QFont::Capitalize + 1000, //!< Simple title case conversion - does not fully grammatically parse the text and uses simple rules only. Note that this method does not convert any characters to lowercase, it only uppercases required letters. Callers must ensure that input strings are already lowercased. 195 : : UpperCamelCase = QFont::Capitalize + 1001, //!< Convert the string to upper camel case. Note that this method does not unaccent characters. 196 : : }; 197 : : 198 : : /** 199 : : * Converts a string by applying capitalization rules to the string. 200 : : * \param string input string 201 : : * \param capitalization capitalization type to apply 202 : : * \returns capitalized string 203 : : * \since QGIS 3.0 204 : : */ 205 : : static QString capitalize( const QString &string, Capitalization capitalization ); 206 : : 207 : : /** 208 : : * Makes a raw string safe for inclusion as a HTML/XML string literal. 209 : : * 210 : : * This includes replacing '<' with '<', '>' with '>', '&' with '&', and 211 : : * any extended unicode characters with the XML style é encoded versions 212 : : * of these characters. 213 : : * \since QGIS 3.2 214 : : */ 215 : : static QString ampersandEncode( const QString &string ); 216 : : 217 : : /** 218 : : * Returns the Levenshtein edit distance between two strings. This equates to the minimum 219 : : * number of character edits (insertions, deletions or substitutions) required to change 220 : : * one string to another. 221 : : * \param string1 first string 222 : : * \param string2 second string 223 : : * \param caseSensitive set to TRUE for case sensitive comparison 224 : : * \returns edit distance. Lower distances indicate more similar strings. 225 : : */ 226 : : static int levenshteinDistance( const QString &string1, const QString &string2, bool caseSensitive = false ); 227 : : 228 : : /** 229 : : * Returns the longest common substring between two strings. This substring is the longest 230 : : * string that is a substring of the two input strings. For example, the longest common substring 231 : : * of "ABABC" and "BABCA" is "ABC". 232 : : * \param string1 first string 233 : : * \param string2 second string 234 : : * \param caseSensitive set to TRUE for case sensitive comparison 235 : : * \returns longest common substring 236 : : */ 237 : : static QString longestCommonSubstring( const QString &string1, const QString &string2, bool caseSensitive = false ); 238 : : 239 : : /** 240 : : * Returns the Hamming distance between two strings. This equates to the number of characters at 241 : : * corresponding positions within the input strings where the characters are different. The input 242 : : * strings must be the same length. 243 : : * \param string1 first string 244 : : * \param string2 second string 245 : : * \param caseSensitive set to TRUE for case sensitive comparison 246 : : * \returns Hamming distance between strings, or -1 if strings are different lengths. 247 : : */ 248 : : static int hammingDistance( const QString &string1, const QString &string2, bool caseSensitive = false ); 249 : : 250 : : /** 251 : : * Returns the Soundex representation of a string. Soundex is a phonetic matching algorithm, 252 : : * so strings with similar sounds should be represented by the same Soundex code. 253 : : * \param string input string 254 : : * \returns 4 letter Soundex code 255 : : */ 256 : : static QString soundex( const QString &string ); 257 : : 258 : : /** 259 : : * Tests a \a candidate string to see how likely it is a match for 260 : : * a specified \a search string. Values are normalized between 0 and 1. 261 : : * \param candidate candidate string 262 : : * \param search search term string 263 : : * \return Normalized value of how likely is the \a search to be in the \a candidate 264 : : * \note Use this function only to calculate the fuzzy score between two strings and later compare these values, but do not depend on the actual numbers. They are implementation detail that may change in a future release. 265 : : * \since 3.14 266 : : */ 267 : : static double fuzzyScore( const QString &candidate, const QString &search ); 268 : : 269 : : /** 270 : : * Returns a string with any URL (e.g., http(s)/ftp) and mailto: text converted to valid HTML <a ...> 271 : : * links. 272 : : * \param string string to insert links into 273 : : * \param foundLinks if specified, will be set to TRUE if any links were inserted into the string 274 : : * \returns string with inserted links 275 : : * \since QGIS 3.0 276 : : */ 277 : : static QString insertLinks( const QString &string, bool *foundLinks = nullptr ); 278 : : 279 : : /** 280 : : * Automatically wraps a \a string by inserting new line characters at appropriate locations in the string. 281 : : * 282 : : * The \a length argument specifies either the minimum or maximum length of lines desired, depending 283 : : * on whether \a useMaxLineLength is TRUE. If \a useMaxLineLength is TRUE, then the string will be wrapped 284 : : * so that each line ideally will not exceed \a length of characters. If \a useMaxLineLength is FALSE, then 285 : : * the string will be wrapped so that each line will ideally exceed \a length of characters. 286 : : * 287 : : * A custom delimiter can be specified to use instead of space characters. 288 : : * 289 : : * \since QGIS 3.4 290 : : */ 291 : : static QString wordWrap( const QString &string, int length, bool useMaxLineLength = true, const QString &customDelimiter = QString() ); 292 : : 293 : : /** 294 : : * Returns a string with characters having vertical representation form substituted. 295 : : * \param string input string 296 : : * \returns string with substitution applied 297 : : * \since QGIS 3.10 298 : : */ 299 : : static QString substituteVerticalCharacters( QString string ); 300 : : 301 : : /** 302 : : * Convert simple HTML to markdown. Only br, b and link are supported. 303 : : * \param html HTML to convert to markdown 304 : : * \returns String formatted as markdown 305 : : * \since QGIS 3.10 306 : : */ 307 : : static QString htmlToMarkdown( const QString &html ); 308 : : 309 : : }; 310 : : 311 : : #endif //QGSSTRINGUTILS_H