/* * string_utils.cpp * * Created by Bob Polis on 14-11-2014. * Copyright 2014 Thalictrum. All rights reserved. * */ #include "string_utils.hpp" #include #include #include #include #include #include #include using namespace std; vector sc::split(const string& str, const string& sep) { vector components; string::size_type start = 0; string::size_type pos = str.find(sep); while (pos != string::npos) { // found separator => add substring to vector components.push_back(str.substr(start, pos - start)); start = pos + sep.length(); // next search starts just after found separator pos = str.find(sep, start); } // at end of string => add last component components.push_back(str.substr(start, str.length() - start)); return components; } vector sc::split(const string& str, const regex& sep) { vector components; sregex_token_iterator end {}; for (sregex_token_iterator p {str.begin(), str.end(), sep, -1}; p != end; ++p) { components.push_back(*p); } return components; } string sc::join(const vector& components, const string& join) { string result; for (vector::const_iterator i = components.cbegin(); i != components.cend(); ++i) { if (i != components.cbegin()) { result += join; } result += *i; } return result; } string sc::trim(const string& str, const string& del) { string result { str }; string::size_type pos {0}; // erase leading chars that occur in del while (result.size() && (pos = result.find_first_of(del)) == 0) { result.erase(pos, 1); } // erase trailing chars that occur in del while (result.size() && (pos = result.find_last_of(del)) == result.size() - 1) { result.erase(pos); } return result; } bool sc::file_exists(const string& path) { struct stat st; return !(::stat(path.c_str(), &st) == -1 && errno == ENOENT); } string sc::file_get_contents(const string& path) { ifstream file {path}; file.exceptions(ios::failbit | ios::badbit); file.seekg(0, ios::end); ios::pos_type file_len {file.tellg()}; file.seekg(0); vector buf; buf.resize(file_len); file.read(buf.data(), file_len); return {buf.data(), static_cast(file_len)}; } map sc::parse_ini_file(const string& path) { map result; string line; ifstream file {path}; file.exceptions(/*ios::failbit |*/ ios::badbit); // it seems that getline() will set failbit when confronted with eof immediately while (getline(file, line)) { if (line[0] == '[') continue; vector parts {split(line, "=")}; if (parts.size() > 1) { string key {trim(parts[0])}; string value {trim(parts[1], " \"")}; result[key] = value; } } return result; } void sc::replace_all(const std::string& what, const std::string& replacement, std::string& target) { std::string::size_type pos = std::string::npos; std::string::size_type from = 0; do { pos = target.find(what, from); if (pos != std::string::npos) { target.replace(pos, what.length(), replacement); from = pos + replacement.length(); } } while (pos != std::string::npos); } string sc::str_replace(const string& what, const string& replacement, const string& target) { string result; string::size_type pos {0}; string::size_type from {0}; while ((pos = target.find(what, from)) != string::npos) { result += target.substr(from, pos - from); result += replacement; from = pos + what.length(); if (from >= target.length()) break; } result += target.substr(from); return result; } wstring sc::replace_all(const std::wstring& what, const std::wstring& replacement, const std::wstring& target) { wstring term {target}; wstring::size_type pos = wstring::npos; wstring::size_type from = 0; do { pos = term.find(what, from); if (pos != wstring::npos) { term.replace(pos, what.length(), replacement); from = pos + replacement.length(); } } while (pos != wstring::npos); return term; } void sc::create_dir(const std::string &path, int mode) { string dir; vector path_components {sc::split(path, "/")}; for (string comp : path_components) { dir += comp + '/'; if (!sc::file_exists(dir)) { throw_if_min1(::mkdir(dir.c_str(), mode)); } } } string sc::dirname(const string& path) { string result {path}; vector buf; buf.resize(PATH_MAX); if (::realpath(path.c_str(), buf.data())) { result = buf.data(); } auto pos = result.rfind("/"); if (pos != string::npos) { return result.substr(0, pos); } return result; } string sc::basename(const string& path, bool remove_extension) { string result {path}; if (remove_extension) { auto dot = path.rfind("."); if (dot != string::npos) { result = result.substr(0, dot); } } auto pos = result.rfind("/"); if (pos != string::npos) { return result.substr(pos + 1); } return result; } string sc::replace_tilde(const string& path) { string result {path}; auto pos = result.find("~"); if (pos != string::npos) { string home {::getenv("HOME")}; result.replace(pos, pos + 1, home); } return result; } string sc::filename_extension(const string& path) { auto pos = path.rfind("."); if (pos != string::npos) { return path.substr(pos); } return ""; } string sc::tool_path(const string& name) { if (name.find('/') != string::npos) { // name has (at least one) slash return sc::real_path(name); } else { // no slash in name => command from PATH? string envpath {::getenv("PATH")}; vector paths {sc::split(envpath, ":")}; for (const string& elem : paths) { string path {elem + '/' + name}; if (sc::file_exists(path)) { return path; } } } return ""; } string sc::truncate(const string& str, unsigned int maxlen, int /*how*/) { if (maxlen > str.length()) return str; return str.substr(0, maxlen - 1) + "\u2026"; // add ellipsis } string sc::lowercase(const string& str, const locale& loc) { string result; for (const char c : str) { result += tolower(c, loc); } return result; } string sc::uppercase(const string& str, const locale& loc) { string result; for (const char c : str) { result += toupper(c, loc); } return result; } bool sc::is_valid_utf8(const string& str) { // From: http://www.zedwood.com/article/cpp-is-valid-utf8-string-function int c,i,ix,n,j; for (i=0, ix=str.length(); i < ix; i++) { c = (unsigned char) str[i]; //if (c==0x09 || c==0x0a || c==0x0d || (0x20 <= c && c <= 0x7e) ) n = 0; // is_printable_ascii if (0x00 <= c && c <= 0x7f) n=0; // 0bbbbbbb else if ((c & 0xE0) == 0xC0) n=1; // 110bbbbb else if ( c==0xed && i<(ix-1) && ((unsigned char)str[i+1] & 0xa0)==0xa0) return false; //U+d800 to U+dfff else if ((c & 0xF0) == 0xE0) n=2; // 1110bbbb else if ((c & 0xF8) == 0xF0) n=3; // 11110bbb //else if (($c & 0xFC) == 0xF8) n=4; // 111110bb //byte 5, unnecessary in 4 byte UTF-8 //else if (($c & 0xFE) == 0xFC) n=5; // 1111110b //byte 6, unnecessary in 4 byte UTF-8 else return false; for (j=0; j buf; buf.resize(PATH_MAX); if (::realpath(path.c_str(), buf.data())) { return string(buf.data()); } return ""; } string sc::remove_accents(const string& text) { vector buf; buf.resize(text.size() * 2); size_t bufsize {buf.size()}; u8_normalize(UNINORM_NFD, reinterpret_cast(text.data()), text.size(), reinterpret_cast(buf.data()), &bufsize); // hack: now remove all bytes with a value higher than 127 auto it = remove_if(buf.begin(), buf.end(), [](uint8_t c) { return c > 127; }); buf.erase(it, buf.end()); return {buf.data()}; }