00001
00002 // dataio 0.5 (beta) - Copyright (C) 2001, by Sadao Massago //
00003 // file: stringutil.h (miscelaneous string related functions) header //
00004 // ----------------------------------------------------------------------//
00005 // The dataio library and related files is licenced under the term of //
00006 // GNU Lesser General Public License version 2.1 or latter //
00007 // (see lesser.txt for detail). //
00008 // For information over GNU or GNU compatible license, visit the site //
00009 // http://www.gnu.org. //
00011
00013 // javadoc style documentation in way to generate class documentation //
00014 // using GNU GPL licensed doxygen utility. For doxygen, see the //
00015 // http://www.doxygen.org //
00017
00019 // currently, getline and putline is not used by dataio
00020 // classes
00022 #ifndef _STRINGUTIL_H_
00023
00024 // #ifdef __plusplus
00025 #define _STRINGUTIL_H_
00026
00027 #include <string>
00028 #include <iostream>
00029 // #include <strstream>
00030 #include <vector>
00031
00032
00033 #ifndef DOXYGEN_SHOULD_SKIP_THIS // doxygen skip this...
00034
00035 #ifdef _DEBUG_ // _DEBUG_ activate debugging mode on stringutil
00036 #define _STRINGUTILDEBUG_
00037 #endif
00038
00039 // #define _STRINGUTILNOTHROW_ // if error occur, use error message
00040 // #define _STRINGUTILNOMESSAGE_ // if error occur, use error message
00041
00042 // #ifdef _STRINGUTILDEBUG_ // activate message in debug mode
00043 // #undef _STRINGUTILNOMESSAGE_ // if error occur, use error message
00044 // #endif
00045
00046 #endif // #ifndef DOXYGEN_SHOULD_SKIP_THIS // doxygen skip this...
00047
00048 // defining group for datautil functions (for doxygen)
00049 // it's need to list function (that is not class member)
00050 // define datautil module group
00052 // starting group (for doxygen)
00055 // #include <errno.h> // for C routine error handle
00056
00057 // #define MAXUINT ((unsigned) ~0)
00058
00059 // #define _DEBUG_
00060 // #define _COMMAFLOAT_ // input float that use comma as dot
00061
00062 // getline works same as this function!!
00063 /* string &readlinestring(fstream &f)
00064 // use istream getline(istream &f, string &item, char delimiter='\n');
00065 {
00066 #define BUFFLEN 10
00067 char buff[BUFFLEN+1];
00068 static string line;
00069 line.erase();
00070 if(unnamed.size() != 0) // process unnamed field
00071 while(!f.eof())
00072 {
00073 f.get(buff, BUFFLEN);
00074 line += string(buff);
00075 if(!f.eof() && (f.get() != '\n'))
00076 f.unget(); // line reading not finished
00077 else return line;
00078 }
00079 return line;
00080 } // readlinestring
00081 */
00082
00084 // status acess
00088 #define _STRINGUTILDEFAULTPRINTERROR true
00091 #define _STRINGUTILDEFAULTTHROWEXCEPTION false
00092
00094 #define _STRINGUTILSTRDELIMITER '\"'
00095
00096 #define _STRINGUTILSCAPECHAR '\\'
00097
00098 #define _STRINGUTILCOMMENTCHAR '#'
00099
00100 #define _STRINGUTILCOLUMNSEPARATOR '\t'
00101
00102 #define _STRINGUTILCNTRLMARK '^'
00103
00104 #define _STRINGUTILHEXAMARK 'X'
00105
00107 #define _STRINGUTILCHARTABLE { \
00108 /* {'\"', '\"'}, */ /* double quote */ \
00109 /* {'\'', '\''}, */ /* quote */ \
00110 /* {'\\', '\\'}, */ /* backslash */ \
00111 {'a', '\a'}, /* Alarm beep */ \
00112 {'b', '\b'}, /* Back space */ \
00113 {'e', '\e'}, /* Escape character */ \
00114 {'f', '\f'}, /* Form Feed (next page) */ \
00115 {'n', '\n'}, /* new line (next line) */ \
00116 {'r', '\r'}, /* Carriage Return (start of current line) */ \
00117 {'t', '\t'}, /* Tabulation */ \
00118 {'v', '\v'}, /* Vertical tabulation */ \
00119 {'\0', '\0'}, /* end of table */ \
00120 }
00121
00125 bool stringutilprinterror();
00126 bool stringutilprinterror(bool status);
00127
00131 bool stringutilthrowexception();
00132 bool stringutilthrowexception(bool status);
00133
00135 // white char detections
00137
00138 // The latin and other usuall extended character,
00139 // if larger than 160?
00140 //
00141 inline bool isextended(char c)
00142 {
00143 return ((unsigned int)c) > 160;
00144 } // isextended
00145
00147 // obs.: isspace('\0') == false
00148 // isspace (POSIX) are: space, form-feed ('\f'), newline ('\n'),
00149 // carriage return ('\r'), horizontal tab ('\t'), and vertical tab ('\v').
00150 // isgraph() is printable except space
00151 // (space is space, but printable)
00152 // isprint() is all printable character
00153 inline bool iswhite(char c)
00154 {
00155 return !isgraph(c) && !isextended(c);
00156 } // iswhite
00157
00158 inline char tocntrl(char c)
00159 {
00160 // if(isalpha(c) || (('['<=toupper(c)) && (toupper(c) <='_')) || )
00161 if(iscntrl(toupper(c)-'A'+1))
00162 return toupper(c)-'A'+1;
00163 else
00164 return c;
00165 }
00166
00168 // miscelaneous functions
00170
00171 // find position of s2 on s1, starting of pos
00173 unsigned stringcasefind(string const &s1, string const &s2, unsigned pos =0);
00174
00175 // find last position of s2 on s1, starting of pos
00178 unsigned stringcasefind_last_of(string const &s1, string const &s2, unsigned pos =0);
00179
00180
00181 // destruct clearing and parsing
00186 string &parsestring(string &s, char strdelimiter=_STRINGUTILSTRDELIMITER, char scapechar=_STRINGUTILSCAPECHAR);
00187
00189 vector <string> &parsestring(vector <string> &strlist, char strdelimiter=_STRINGUTILSTRDELIMITER, char scapechar=_STRINGUTILSCAPECHAR);
00190
00192 string &reverseparsestring(string &s, char strdelimiter=_STRINGUTILSTRDELIMITER, char scapechar=_STRINGUTILSCAPECHAR);
00193
00195 vector <string> &reverseparsestring(vector <string> &strlist, char strdelimiter=_STRINGUTILSTRDELIMITER, char scapechar=_STRINGUTILSCAPECHAR);
00196
00207 unsigned findstringdelimiterclose(string const &s, unsigned pos=0, char strdelimiter=_STRINGUTILSTRDELIMITER, char scapechar=_STRINGUTILSCAPECHAR);
00208
00210 bool stringdelimiterbalanced(string const &s, char strdelimiter=_STRINGUTILSTRDELIMITER, char scapechar=_STRINGUTILSCAPECHAR, unsigned pos1=0, unsigned pos2=(~0U));
00213 bool needdelimiter(string const &s, char strdelimiter=_STRINGUTILSTRDELIMITER, char scapechar=_STRINGUTILSCAPECHAR);
00214
00223 istream &gettextline(istream &f, string &line, char comment=_STRINGUTILCOMMENTCHAR,
00224 char strdelimiter=_STRINGUTILSTRDELIMITER,
00225 char scapechar=_STRINGUTILSCAPECHAR, char newlinedelimiter='\0');
00226
00231 // \sa enhacedgetline
00232 // istream &getline(istream &f, vector <string> &dataline,
00233 // char separator = _STRINGUTILCOLUMNSEPARATOR, char newline='\0');
00234
00235 // enhaced version of getline()
00236 // \sa getline
00237 istream &getline(istream &f, vector <string> &dataline, char separator = _STRINGUTILCOLUMNSEPARATOR,
00238 char comment=_STRINGUTILCOMMENTCHAR, char strdelimiter=_STRINGUTILSTRDELIMITER,
00239 char scapechar=_STRINGUTILSCAPECHAR, char newline='\0');
00240
00243
00244 // \sa enhacedgetlines
00245 // istream &getlines(istream &f, vector <vector<string> > &datalist,
00246 // char separator = _STRINGUTILCOLUMNSEPARATOR, char comment='\0', char newline='\0');
00247
00248 // enhaced version of getlines()
00249 // \sa getlines
00250 istream &getlines(istream &f, vector <vector <string> > &datalist,
00251 char separator = _STRINGUTILCOLUMNSEPARATOR,
00252 char comment=_STRINGUTILCOMMENTCHAR,
00253 char strdelimiter=_STRINGUTILSTRDELIMITER,
00254 char scapechar=_STRINGUTILSCAPECHAR, char newline='\0');
00255
00256
00257
00258 // vectorized putline
00262 ostream &putline(ostream &f, vector <string> &dataline,
00263 char separator = _STRINGUTILCOLUMNSEPARATOR, char newline='\0');
00264
00267 ostream &putlines(ostream &f, vector <vector<string> > &datalist,
00268 char separator = _STRINGUTILCOLUMNSEPARATOR, char newline='\0');
00269
00270 // vector <vector ,string> > transposition
00271 // extern vector <vector <string> > &transpose(vector <vector <string> >&data);
00274 vector <vector <string> > &transpose(vector <vector <string> >&data);
00275
00276
00277 // endding group (for doxygen)
00280 #endif // #define _STRINGUTIL_H_
00281
1.2.8.1 written by Dimitri van Heesch,
© 1997-2001