libscstring/string_utils.cpp

297 lines
8.5 KiB
C++
Raw Normal View History

2020-03-16 15:16:35 +01:00
/*
* string_utils.cpp
*
* Created by Bob Polis on 14-11-2014.
* Copyright 2014 Thalictrum. All rights reserved.
*
*/
#include "string_utils.hpp"
#include <libscerror.hpp>
#include <sys/stat.h>
#include <limits.h>
#include <uninorm.h>
2020-03-16 15:16:35 +01:00
#include <cerrno>
#include <fstream>
#include <algorithm>
using namespace std;
vector<string> sc::split(const string& str, const string& sep)
{
vector<string> components;
string::size_type start = 0;
string::size_type pos = str.find(sep);
while (pos != string::npos) { // found separator => add substring to vector
components.push_back(str.substr(start, pos - start));
start = pos + sep.length(); // next search starts just after found separator
pos = str.find(sep, start);
}
// at end of string => add last component
components.push_back(str.substr(start, str.length() - start));
return components;
2020-03-16 15:16:35 +01:00
}
vector<string> sc::split(const string& str, const regex& sep) {
vector<string> components;
sregex_token_iterator end {};
for (sregex_token_iterator p {str.begin(), str.end(), sep, -1}; p != end; ++p) {
components.push_back(*p);
}
return components;
2020-03-16 15:16:35 +01:00
}
string sc::join(const vector<string>& components, const string& join)
{
string result;
for (vector<string>::const_iterator i = components.cbegin(); i != components.cend(); ++i) {
if (i != components.cbegin()) {
result += join;
}
result += *i;
}
return result;
2020-03-16 15:16:35 +01:00
}
string sc::trim(const string& str, const string& del)
{
string result { str };
string::size_type pos {0};
// erase leading chars that occur in del
while (result.size() && (pos = result.find_first_of(del)) == 0) {
result.erase(pos, 1);
}
// erase trailing chars that occur in del
while (result.size() && (pos = result.find_last_of(del)) == result.size() - 1) {
result.erase(pos);
}
return result;
}
bool sc::file_exists(const string& path)
{
struct stat st;
return !(::stat(path.c_str(), &st) == -1 && errno == ENOENT);
}
string sc::file_get_contents(const string& path)
{
ifstream file {path};
file.exceptions(ios::failbit | ios::badbit);
file.seekg(0, ios::end);
ios::pos_type file_len {file.tellg()};
2020-03-16 15:16:35 +01:00
file.seekg(0);
2021-12-01 16:12:32 +01:00
vector<char> buf(file_len);
2020-03-16 15:16:35 +01:00
file.read(buf.data(), file_len);
return {buf.data(), static_cast<string::size_type>(file_len)};
}
map<string, string> sc::parse_ini_file(const string& path)
{
map<string, string> result;
string line;
ifstream file {path};
file.exceptions(/*ios::failbit |*/ ios::badbit); // it seems that getline() will set failbit when confronted with eof immediately
2020-03-16 15:16:35 +01:00
while (getline(file, line)) {
if (line[0] == '[') continue;
vector<string> parts {split(line, "=")};
if (parts.size() > 1) {
string key {trim(parts[0])};
string value {trim(parts[1], " \"")};
result[key] = value;
}
}
return result;
}
2021-07-02 11:38:57 +02:00
void sc::replace_all(const string& what, const string& replacement, string& target) {
string::size_type pos = string::npos;
string::size_type from = 0;
do {
pos = target.find(what, from);
2021-07-02 11:38:57 +02:00
if (pos != string::npos) {
target.replace(pos, what.length(), replacement);
from = pos + replacement.length();
}
2021-07-02 11:38:57 +02:00
} while (pos != string::npos);
}
2020-03-16 15:16:35 +01:00
string sc::str_replace(const string& what, const string& replacement, const string& target)
{
string result;
string::size_type pos {0};
string::size_type from {0};
while ((pos = target.find(what, from)) != string::npos) {
result += target.substr(from, pos - from);
result += replacement;
from = pos + what.length();
if (from >= target.length()) break;
2020-03-16 15:16:35 +01:00
}
result += target.substr(from);
return result;
}
2021-07-02 11:38:57 +02:00
wstring sc::replace_all(const wstring& what,
const wstring& replacement,
const wstring& target)
2020-03-16 15:16:35 +01:00
{
wstring term {target};
wstring::size_type pos = wstring::npos;
wstring::size_type from = 0;
do {
pos = term.find(what, from);
if (pos != wstring::npos) {
term.replace(pos, what.length(), replacement);
from = pos + replacement.length();
}
} while (pos != wstring::npos);
return term;
2020-03-16 15:16:35 +01:00
}
2021-07-02 11:38:57 +02:00
void sc::create_dir(const string &path, int mode)
2020-03-16 15:16:35 +01:00
{
string dir;
vector<string> path_components {sc::split(path, "/")};
for (string comp : path_components) {
dir += comp + '/';
if (!sc::file_exists(dir)) {
throw_if_min1(::mkdir(dir.c_str(), mode));
}
}
}
string sc::dirname(const string& path) {
string result {path};
vector<char> buf;
buf.resize(PATH_MAX);
if (::realpath(path.c_str(), buf.data())) {
result = buf.data();
}
auto pos = result.rfind("/");
if (pos != string::npos) {
return result.substr(0, pos);
}
return result;
}
string sc::basename(const string& path, bool remove_extension) {
string result {path};
if (remove_extension) {
auto dot = path.rfind(".");
if (dot != string::npos) {
result = result.substr(0, dot);
}
}
2020-03-16 15:16:35 +01:00
auto pos = result.rfind("/");
if (pos != string::npos) {
return result.substr(pos + 1);
}
return result;
}
string sc::replace_tilde(const string& path) {
string result {path};
auto pos = result.find("~");
if (pos != string::npos) {
string home {::getenv("HOME")};
result.replace(pos, pos + 1, home);
}
return result;
}
string sc::filename_extension(const string& path) {
auto pos = path.rfind(".");
if (pos != string::npos) {
return path.substr(pos);
}
return "";
2020-03-16 15:16:35 +01:00
}
string sc::tool_path(const string& name) {
if (name.find('/') != string::npos) { // name has (at least one) slash
return sc::real_path(name);
2020-03-16 15:16:35 +01:00
} else { // no slash in name => command from PATH?
string envpath {::getenv("PATH")};
vector<string> paths {sc::split(envpath, ":")};
for (const string& elem : paths) {
string path {elem + '/' + name};
if (sc::file_exists(path)) {
return path;
}
}
}
return "";
}
string sc::truncate(const string& str, unsigned int maxlen, int /*how*/) {
if (maxlen > str.length()) return str;
return str.substr(0, maxlen - 1) + "\u2026"; // add ellipsis
}
string sc::lowercase(const string& str, const locale& loc) {
string result;
for (const char c : str) {
result += tolower(c, loc);
}
return result;
}
string sc::uppercase(const string& str, const locale& loc) {
string result;
for (const char c : str) {
result += toupper(c, loc);
}
return result;
}
bool sc::is_valid_utf8(const string& str) {
// From: http://www.zedwood.com/article/cpp-is-valid-utf8-string-function
2020-03-16 15:16:35 +01:00
int c,i,ix,n,j;
for (i=0, ix=str.length(); i < ix; i++)
{
c = (unsigned char) str[i];
//if (c==0x09 || c==0x0a || c==0x0d || (0x20 <= c && c <= 0x7e) ) n = 0; // is_printable_ascii
if (0x00 <= c && c <= 0x7f) n=0; // 0bbbbbbb
else if ((c & 0xE0) == 0xC0) n=1; // 110bbbbb
else if ( c==0xed && i<(ix-1) && ((unsigned char)str[i+1] & 0xa0)==0xa0) return false; //U+d800 to U+dfff
else if ((c & 0xF0) == 0xE0) n=2; // 1110bbbb
else if ((c & 0xF8) == 0xF0) n=3; // 11110bbb
//else if (($c & 0xFC) == 0xF8) n=4; // 111110bb //byte 5, unnecessary in 4 byte UTF-8
//else if (($c & 0xFE) == 0xFC) n=5; // 1111110b //byte 6, unnecessary in 4 byte UTF-8
else return false;
for (j=0; j<n && i<ix; j++) { // n bytes matching 10bbbbbb follow ?
if ((++i == ix) || (( (unsigned char)str[i] & 0xC0) != 0x80))
return false;
}
}
return true;
}
string sc::real_path(const string& path) {
vector<char> buf;
buf.resize(PATH_MAX);
if (::realpath(path.c_str(), buf.data())) {
return string(buf.data());
}
return "";
}
string sc::remove_accents(const string& text) {
vector<char> buf;
buf.resize(text.size() * 2);
size_t bufsize {buf.size()};
u8_normalize(UNINORM_NFD,
reinterpret_cast<const uint8_t*>(text.data()),
text.size(),
reinterpret_cast<uint8_t*>(buf.data()),
&bufsize);
// hack: now remove all bytes with a value higher than 127
auto it = remove_if(buf.begin(), buf.end(), [](uint8_t c) {
return c > 127;
});
buf.erase(it, buf.end());
return {buf.data()};
}