297 lines
8.5 KiB
C++
297 lines
8.5 KiB
C++
/*
|
|
* string_utils.cpp
|
|
*
|
|
* Created by Bob Polis on 14-11-2014.
|
|
* Copyright 2014 Thalictrum. All rights reserved.
|
|
*
|
|
*/
|
|
|
|
#include "string_utils.hpp"
|
|
#include <libscerror.hpp>
|
|
#include <sys/stat.h>
|
|
#include <limits.h>
|
|
#include <uninorm.h>
|
|
#include <cerrno>
|
|
#include <fstream>
|
|
#include <algorithm>
|
|
using namespace std;
|
|
|
|
vector<string> sc::split(const string& str, const string& sep)
|
|
{
|
|
vector<string> components;
|
|
string::size_type start = 0;
|
|
string::size_type pos = str.find(sep);
|
|
while (pos != string::npos) { // found separator => add substring to vector
|
|
components.push_back(str.substr(start, pos - start));
|
|
start = pos + sep.length(); // next search starts just after found separator
|
|
pos = str.find(sep, start);
|
|
}
|
|
// at end of string => add last component
|
|
components.push_back(str.substr(start, str.length() - start));
|
|
return components;
|
|
}
|
|
|
|
vector<string> sc::split(const string& str, const regex& sep) {
|
|
vector<string> components;
|
|
sregex_token_iterator end {};
|
|
for (sregex_token_iterator p {str.begin(), str.end(), sep, -1}; p != end; ++p) {
|
|
components.push_back(*p);
|
|
}
|
|
return components;
|
|
}
|
|
|
|
string sc::join(const vector<string>& components, const string& join)
|
|
{
|
|
string result;
|
|
for (vector<string>::const_iterator i = components.cbegin(); i != components.cend(); ++i) {
|
|
if (i != components.cbegin()) {
|
|
result += join;
|
|
}
|
|
result += *i;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
string sc::trim(const string& str, const string& del)
|
|
{
|
|
string result { str };
|
|
string::size_type pos {0};
|
|
|
|
// erase leading chars that occur in del
|
|
while (result.size() && (pos = result.find_first_of(del)) == 0) {
|
|
result.erase(pos, 1);
|
|
}
|
|
|
|
// erase trailing chars that occur in del
|
|
while (result.size() && (pos = result.find_last_of(del)) == result.size() - 1) {
|
|
result.erase(pos);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
bool sc::file_exists(const string& path)
|
|
{
|
|
struct stat st;
|
|
return !(::stat(path.c_str(), &st) == -1 && errno == ENOENT);
|
|
}
|
|
|
|
string sc::file_get_contents(const string& path)
|
|
{
|
|
ifstream file {path};
|
|
file.exceptions(ios::failbit | ios::badbit);
|
|
file.seekg(0, ios::end);
|
|
ios::pos_type file_len {file.tellg()};
|
|
file.seekg(0);
|
|
vector<char> buf(file_len);
|
|
file.read(buf.data(), file_len);
|
|
return {buf.data(), static_cast<string::size_type>(file_len)};
|
|
}
|
|
|
|
map<string, string> sc::parse_ini_file(const string& path)
|
|
{
|
|
map<string, string> result;
|
|
string line;
|
|
ifstream file {path};
|
|
file.exceptions(/*ios::failbit |*/ ios::badbit); // it seems that getline() will set failbit when confronted with eof immediately
|
|
while (getline(file, line)) {
|
|
if (line[0] == '[') continue;
|
|
vector<string> parts {split(line, "=")};
|
|
if (parts.size() > 1) {
|
|
string key {trim(parts[0])};
|
|
string value {trim(parts[1], " \"")};
|
|
result[key] = value;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
void sc::replace_all(const string& what, const string& replacement, string& target) {
|
|
string::size_type pos = string::npos;
|
|
string::size_type from = 0;
|
|
do {
|
|
pos = target.find(what, from);
|
|
if (pos != string::npos) {
|
|
target.replace(pos, what.length(), replacement);
|
|
from = pos + replacement.length();
|
|
}
|
|
} while (pos != string::npos);
|
|
}
|
|
|
|
string sc::str_replace(const string& what, const string& replacement, const string& target)
|
|
{
|
|
string result;
|
|
string::size_type pos {0};
|
|
string::size_type from {0};
|
|
while ((pos = target.find(what, from)) != string::npos) {
|
|
result += target.substr(from, pos - from);
|
|
result += replacement;
|
|
from = pos + what.length();
|
|
if (from >= target.length()) break;
|
|
}
|
|
result += target.substr(from);
|
|
return result;
|
|
}
|
|
|
|
wstring sc::replace_all(const wstring& what,
|
|
const wstring& replacement,
|
|
const wstring& target)
|
|
{
|
|
wstring term {target};
|
|
wstring::size_type pos = wstring::npos;
|
|
wstring::size_type from = 0;
|
|
do {
|
|
pos = term.find(what, from);
|
|
if (pos != wstring::npos) {
|
|
term.replace(pos, what.length(), replacement);
|
|
from = pos + replacement.length();
|
|
}
|
|
} while (pos != wstring::npos);
|
|
return term;
|
|
}
|
|
|
|
void sc::create_dir(const string &path, int mode)
|
|
{
|
|
string dir;
|
|
vector<string> path_components {sc::split(path, "/")};
|
|
for (string comp : path_components) {
|
|
dir += comp + '/';
|
|
if (!sc::file_exists(dir)) {
|
|
throw_if_min1(::mkdir(dir.c_str(), mode));
|
|
}
|
|
}
|
|
}
|
|
|
|
string sc::dirname(const string& path) {
|
|
string result {path};
|
|
vector<char> buf;
|
|
buf.resize(PATH_MAX);
|
|
if (::realpath(path.c_str(), buf.data())) {
|
|
result = buf.data();
|
|
}
|
|
auto pos = result.rfind("/");
|
|
if (pos != string::npos) {
|
|
return result.substr(0, pos);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
string sc::basename(const string& path, bool remove_extension) {
|
|
string result {path};
|
|
if (remove_extension) {
|
|
auto dot = path.rfind(".");
|
|
if (dot != string::npos) {
|
|
result = result.substr(0, dot);
|
|
}
|
|
}
|
|
auto pos = result.rfind("/");
|
|
if (pos != string::npos) {
|
|
return result.substr(pos + 1);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
string sc::replace_tilde(const string& path) {
|
|
string result {path};
|
|
auto pos = result.find("~");
|
|
if (pos != string::npos) {
|
|
string home {::getenv("HOME")};
|
|
result.replace(pos, pos + 1, home);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
string sc::filename_extension(const string& path) {
|
|
auto pos = path.rfind(".");
|
|
if (pos != string::npos) {
|
|
return path.substr(pos);
|
|
}
|
|
return "";
|
|
}
|
|
|
|
string sc::tool_path(const string& name) {
|
|
if (name.find('/') != string::npos) { // name has (at least one) slash
|
|
return sc::real_path(name);
|
|
} else { // no slash in name => command from PATH?
|
|
string envpath {::getenv("PATH")};
|
|
vector<string> paths {sc::split(envpath, ":")};
|
|
for (const string& elem : paths) {
|
|
string path {elem + '/' + name};
|
|
if (sc::file_exists(path)) {
|
|
return path;
|
|
}
|
|
}
|
|
}
|
|
return "";
|
|
}
|
|
|
|
string sc::truncate(const string& str, unsigned int maxlen, int /*how*/) {
|
|
if (maxlen > str.length()) return str;
|
|
return str.substr(0, maxlen - 1) + "\u2026"; // add ellipsis
|
|
}
|
|
|
|
string sc::lowercase(const string& str, const locale& loc) {
|
|
string result;
|
|
for (const char c : str) {
|
|
result += tolower(c, loc);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
string sc::uppercase(const string& str, const locale& loc) {
|
|
string result;
|
|
for (const char c : str) {
|
|
result += toupper(c, loc);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
bool sc::is_valid_utf8(const string& str) {
|
|
// From: http://www.zedwood.com/article/cpp-is-valid-utf8-string-function
|
|
int c,i,ix,n,j;
|
|
for (i=0, ix=str.length(); i < ix; i++)
|
|
{
|
|
c = (unsigned char) str[i];
|
|
//if (c==0x09 || c==0x0a || c==0x0d || (0x20 <= c && c <= 0x7e) ) n = 0; // is_printable_ascii
|
|
if (0x00 <= c && c <= 0x7f) n=0; // 0bbbbbbb
|
|
else if ((c & 0xE0) == 0xC0) n=1; // 110bbbbb
|
|
else if ( c==0xed && i<(ix-1) && ((unsigned char)str[i+1] & 0xa0)==0xa0) return false; //U+d800 to U+dfff
|
|
else if ((c & 0xF0) == 0xE0) n=2; // 1110bbbb
|
|
else if ((c & 0xF8) == 0xF0) n=3; // 11110bbb
|
|
//else if (($c & 0xFC) == 0xF8) n=4; // 111110bb //byte 5, unnecessary in 4 byte UTF-8
|
|
//else if (($c & 0xFE) == 0xFC) n=5; // 1111110b //byte 6, unnecessary in 4 byte UTF-8
|
|
else return false;
|
|
for (j=0; j<n && i<ix; j++) { // n bytes matching 10bbbbbb follow ?
|
|
if ((++i == ix) || (( (unsigned char)str[i] & 0xC0) != 0x80))
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
string sc::real_path(const string& path) {
|
|
vector<char> buf;
|
|
buf.resize(PATH_MAX);
|
|
if (::realpath(path.c_str(), buf.data())) {
|
|
return string(buf.data());
|
|
}
|
|
return "";
|
|
}
|
|
|
|
string sc::remove_accents(const string& text) {
|
|
vector<char> buf;
|
|
buf.resize(text.size() * 2);
|
|
size_t bufsize {buf.size()};
|
|
u8_normalize(UNINORM_NFD,
|
|
reinterpret_cast<const uint8_t*>(text.data()),
|
|
text.size(),
|
|
reinterpret_cast<uint8_t*>(buf.data()),
|
|
&bufsize);
|
|
|
|
// hack: now remove all bytes with a value higher than 127
|
|
auto it = remove_if(buf.begin(), buf.end(), [](uint8_t c) {
|
|
return c > 127;
|
|
});
|
|
buf.erase(it, buf.end());
|
|
return {buf.data()};
|
|
}
|