diff --git a/CMakeLists.txt b/CMakeLists.txt index 95c94a115871a2e99ece1ca7c7ce2646d3ab0b96..d12a189df26dfb34eb110b9df8319b4bc024e794 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,6 +34,11 @@ option(ARB_BUILD_VALIDATION_DATA "generate validation data" OFF) set(ARB_VALIDATION_DATA_DIR "${PROJECT_SOURCE_DIR}/validation/data" CACHE PATH "location of generated validation data") +# For sup::glop, just wrap POSIX glob(3)? +# Turn off for platforms without glob(3) in libc, e.g. Android Bionic. + +option(ARB_USE_POSIX_GLOB "wrap POSIX glob(3) for glob functionality" ON) + #---------------------------------------------------------- # Configure-time features for Arbor: #---------------------------------------------------------- @@ -47,6 +52,7 @@ option(ARB_WITH_PROFILING "use built-in profiling" OFF) option(ARB_WITH_ASSERTIONS "enable arb_assert() assertions in code" OFF) + #---------------------------------------------------------- # Global CMake configuration #---------------------------------------------------------- diff --git a/sup/CMakeLists.txt b/sup/CMakeLists.txt index 227a760b3379e5b6ab26c0657ed62680684a492c..f81a24e0cad6d3d3183ab0fde378f8ce296be708 100644 --- a/sup/CMakeLists.txt +++ b/sup/CMakeLists.txt @@ -1,10 +1,17 @@ set(sup-sources - glob.cpp + glob_basic.cpp ioutil.cpp json_meter.cpp path.cpp ) +if(ARB_USE_POSIX_GLOB) + list(APPEND sup-sources glob_posix.cpp) +else() + list(APPEND sup-sources glob_basic_wrap.cpp) +endif() + + add_library(arbor-sup ${sup-sources}) # Compile sup library with the same optimization flags as libarbor. diff --git a/sup/glob_basic.cpp b/sup/glob_basic.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1be55b0e2443486d14b6056f6c3191cddc9ce9ba --- /dev/null +++ b/sup/glob_basic.cpp @@ -0,0 +1,247 @@ +#include <list> +#include <string> +#include <vector> + +#include <sup/glob.hpp> +#include <sup/path.hpp> + +namespace sup { + +struct glob_sup_fs_provider { + using action_type = std::function<void (const sup::path&)>; + + bool is_directory(const sup::path& p) const { + return sup::is_directory(p); + }; + + bool exists(const sup::path& p) const { + return sup::exists(p); + } + + void for_each_directory(const sup::path& p, action_type action) const { + std::error_code ec; + for (const auto& e: get_iterator(p)) { + if (sup::is_directory(e.path(), ec)) action(e.path()); + } + } + + void for_each_entry(const sup::path& p, action_type action) const { + for (const auto& e: get_iterator(p)) { + action(e.path()); + } + } + +private: + static directory_iterator get_iterator(const sup::path& p) { + return directory_iterator(p.empty()? ".": p, + directory_options::skip_permission_denied); + } +}; + +glob_fs_provider glob_native_provider{glob_sup_fs_provider{}}; + +static bool match_char_class(const char*& p, char c) { + // Special cases for first character: + // ! => negate test defined from following char. + // - => treat '-' as literal. + // ] => treat ']' as literal. + + if (*p!='[') return false; + ++p; + + bool negate = false; + bool match = false; + + if (*p=='!') { + negate = true; + ++p; + } + + bool first = true; + char lrange = 0; + for (; !match && *p && (first || *p!=']'); ++p) { + + bool last = *p && p[1]==']'; + if (*p=='-' && lrange && !first && !last) { + match = c>=lrange && c<=*++p; + lrange = 0; + continue; + } + + lrange = *p; + match = c==*p; + first = false; + } + + while (*p && *p!=']') ++p; + if (!*p) return false; + + return match^negate; +} + +// Special exception for filename globbing: an initial period '.' can only be matched +// by an intial '.' in the pattern. + +bool glob_basic_match(const char* p, const char* t) { + // NFA state represented by pointers into directly into pattern. + std::list<const char*> state = {p}; + + char c; + bool initial_dot = *t=='.'; + do { + c = *t++; + for (auto i = state.begin(); i!=state.end();) { + switch (**i) { + case '*': + if (initial_dot) goto fail; + if (i==state.begin() || *std::prev(i)!=*i) { + state.insert(i, *i); + } + while (**i=='*') ++*i; + continue; + case '?': + if (initial_dot) goto fail; + if (c) goto advance; + else goto fail; + case '[': + if (initial_dot) goto fail; + if (c && match_char_class(*i, c)) goto advance; + else goto fail; + case '\\': + ++*i; // fall-through + default: + if (**i==c) goto advance; + else goto fail; + } + + fail: + i = state.erase(i); + continue; + + advance: + *i += !!c; + ++i; + continue; + } + initial_dot = false; + } while (c && !state.empty()); + + return !state.empty() && !*state.back(); +} + +// Return first component, overwriting delimitter with NUL. +// Set pattern to beginning of next path component, skipping delimiters. + +struct pattern_component { + const char* pattern = nullptr; + bool literal = false; + bool directory = false; +}; + +static pattern_component tokenize(char*& pattern) { + if (!*pattern) return {pattern, true, false}; + + char* s = nullptr; + char* p = pattern; + bool meta = false; + + do { + while (*p=='/') ++p; + + bool in_charclass = false; + bool escape = false; + for (;*p && *p!='/'; ++p) { + switch (*p) { + case '[': + if (!escape) { + in_charclass = true; + meta = true; + } + break; + case '*': + if (!escape) meta = true; + break; + case '?': + if (!escape) meta = true; + break; + case '\\': + if (!escape && !in_charclass) escape = true; + break; + case ']': + if (in_charclass) in_charclass = false; + break; + default: ; + } + } + if (!meta) s = p; + } while (!meta && *p); + + pattern_component k = { pattern }; + k.literal = (bool)s; + + if (!s) s = p; + k.directory = !!*s; + + pattern = s; + while (*pattern=='/') ++pattern; + + *s = 0; + return k; +} + +// Return matching paths, unsorted, based on supplied pattern. +// Performs breadth-first search of the directory tree. + +std::vector<path> glob_basic(const std::string& pattern, const glob_fs_provider& fs) { + if (pattern.empty()) return {}; + + // Make a mutable copy for tokenization. + std::vector<char> pcopy(pattern.begin(), pattern.end()); + pcopy.push_back(0); + + char* c = pcopy.data(); + if (!*c) return {}; + + std::vector<sup::path> paths, new_paths; + paths.push_back(""); + + if (*c=='/') { + while (*c=='/') ++c; + paths[0] = "/"; + } + + do { + pattern_component component = tokenize(c); + + if (component.literal) { + for (auto p: paths) { + p /= component.pattern; + + if (component.directory) { + if (fs.is_directory(p)) new_paths.push_back(std::move(p)); + } + else { + if (fs.exists(p)) new_paths.push_back(std::move(p)); + } + } + } + else { + auto push_if_match = [&new_paths, pattern = component.pattern](const sup::path& p) { + if (glob_basic_match(pattern, p.filename().c_str())) new_paths.push_back(p); + }; + + for (auto p: paths) { + if (component.directory) fs.for_each_directory(p.c_str(), push_if_match); + else fs.for_each_entry(p.c_str(), push_if_match); + } + } + + std::swap(paths, new_paths); + new_paths.clear(); + } while (*c); + + return paths; +} + +} // namespace sup + diff --git a/sup/glob_basic_wrap.cpp b/sup/glob_basic_wrap.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1765dd80a8ee6b75016d2b699e94511d4bc491fc --- /dev/null +++ b/sup/glob_basic_wrap.cpp @@ -0,0 +1,13 @@ +#include <string> +#include <vector> + +#include <sup/glob.hpp> + +namespace sup { + +std::vector<path> glob(const std::string& pattern) { + return glob_basic(pattern); +} + +} // namespace sup + diff --git a/sup/glob.cpp b/sup/glob_posix.cpp similarity index 100% rename from sup/glob.cpp rename to sup/glob_posix.cpp diff --git a/sup/include/sup/glob.hpp b/sup/include/sup/glob.hpp index aef8d68eb2497bf1038937d84e8c469a493d3cef..1f734f52fbb753926fad269b985e78c9ff2e3e91 100644 --- a/sup/include/sup/glob.hpp +++ b/sup/include/sup/glob.hpp @@ -1,14 +1,101 @@ #pragma once -// glob (3) wrapper -// TODO: emulate for not-entirely-POSIX platforms. +// Glob implementation via glob (3) wrapper or fallback implementation. +#include <functional> +#include <string> +#include <vector> #include <sup/path.hpp> namespace sup { +// Wrapper (provided by either glob_posix.cpp or +// glob_basic_wrapper.cpp based on configuration.) + std::vector<path> glob(const std::string& pattern); +// Basic globber. +// +// Uses `glob_fs_provider` to provide required filesystem +// operations, defaults to implementation based around +// sup-provided directory iterators. + +struct glob_fs_provider { + using action_type = std::function<void (const sup::path&)>; + + template <typename Impl> + glob_fs_provider(Impl impl): inner_(new wrap<Impl>(std::move(impl))) {} + + glob_fs_provider(const glob_fs_provider& x): inner_(x.inner_->clone()) {} + + bool is_directory(const sup::path& p) const { + return inner_->is_directory(p); + } + + bool exists(const sup::path& p) const { + return inner_->exists(p); + } + + void for_each_directory(const sup::path& p, action_type action) const { + inner_->for_each_directory(p, action); + } + + void for_each_entry(const sup::path& p, action_type action) const { + inner_->for_each_entry(p, action); + } + +private: + struct base { + virtual bool is_directory(const sup::path&) const = 0; + virtual bool exists(const sup::path&) const = 0; + virtual void for_each_directory(const sup::path&, action_type action) const = 0; + virtual void for_each_entry(const sup::path&, action_type action) const = 0; + virtual base* clone() const = 0; + virtual ~base() {} + }; + + template <typename Impl> + struct wrap: base { + wrap(Impl impl): impl_(std::move(impl)) {} + + bool is_directory(const sup::path& p) const override { + return impl_.is_directory(p); + } + + bool exists(const sup::path& p) const override { + return impl_.exists(p); + } + + void for_each_directory(const sup::path& p, action_type action) const override { + impl_.for_each_directory(p, action); + } + + void for_each_entry(const sup::path& p, action_type action) const override { + impl_.for_each_entry(p, action); + } + + base* clone() const override { + return new wrap(impl_); + } + + Impl impl_; + }; + + std::unique_ptr<base> inner_; +}; + +extern glob_fs_provider glob_native_provider; + +std::vector<path> glob_basic(const std::string& pattern, const glob_fs_provider& = glob_native_provider); + +// Expose glob filename expression matcher for unit testing. +// +// Follows glob(7) description except for: +// * No character class support, e.g. [:alpha:]. +// * Ignores LC_COLLATE for character ranges, and does not accommodate multibyte encodings. + +bool glob_basic_match(const char* p, const char* t); + } // namespace sup diff --git a/sup/include/sup/path.hpp b/sup/include/sup/path.hpp index 0502d9a037abf5eb5c8424851b2fc8e7a9828569..3c13aa2d440fdb0d47e02106278c86aae3362018 100644 --- a/sup/include/sup/path.hpp +++ b/sup/include/sup/path.hpp @@ -19,8 +19,10 @@ #include <cstddef> #include <exception> -#include <string> +#include <functional> #include <iostream> +#include <memory> +#include <string> #include <utility> #include <vector> @@ -84,7 +86,7 @@ public: // Append posix_path components template <typename Source> - posix_path& append(const Source& source) { + posix_path& append(const Source& source) { return append(posix_path(source)); } @@ -172,6 +174,30 @@ public: return canonical().compare(other.canonical()); } + posix_path filename() const { + auto i = p_.rfind('/'); + return i==std::string::npos? *this: posix_path(p_.substr(i+1)); + } + + bool has_filename() const { + if (p_.empty()) return false; + + auto i = p_.rfind('/'); + return i==std::string::npos || i+1<p_.length(); + } + + posix_path parent_path() const { + auto i = p_.rfind('/'); + + if (i==0) return posix_path("/"); + else if (i==std::string::npos) return posix_path(); + else return posix_path(p_.substr(0, i)); + } + + bool has_parent_path() const { + return p_.rfind('/')!=std::string::npos; + } + // Non-member functions friend posix_path operator/(const posix_path& a, const posix_path& b) { @@ -181,7 +207,7 @@ public: friend std::size_t hash_value(const posix_path& p) { std::hash<posix_path::string_type> hash; - return hash(p.p_); + return hash(p.canonical()); } friend std::ostream& operator<<(std::ostream& o, const posix_path& p) { @@ -337,31 +363,74 @@ private: // POSIX implementations of path queries (see path.cpp for implementations). -namespace posix { - file_status status(const path&, std::error_code&); - file_status symlink_status(const path&, std::error_code&); -} +file_status posix_status(const path&, std::error_code&) noexcept; +file_status posix_symlink_status(const path&, std::error_code&) noexcept; -inline file_status status(const path& p, std::error_code& ec) { - return posix::status(p, ec); +inline file_status status(const path& p, std::error_code& ec) noexcept { + return posix_status(p, ec); } -inline file_status symlink_status(const path& p, std::error_code& ec) { - return posix::symlink_status(p, ec); +inline file_status symlink_status(const path& p, std::error_code& ec) noexcept { + return posix_symlink_status(p, ec); } // Wrappers for `status()`, again following std::filesystem. inline file_status status(const path& p) { std::error_code ec; - auto r = ::sup::posix::status(p, ec); + auto r = status(p, ec); if (ec) { throw filesystem_error("status()", p, ec); } return r; } -inline bool is_directory(file_status s) { +inline file_status symlink_status(const path& p) { + std::error_code ec; + auto r = symlink_status(p, ec); + if (ec) { + throw filesystem_error("status()", p, ec); + } + return r; +} + +inline bool exists(file_status s) noexcept { + return s.type()!=file_type::not_found; +} + +inline bool exists(const path& p) { + return exists(status(p)); +} + +inline bool exists(const path& p, std::error_code& ec) noexcept { + return exists(status(p, ec)); +} + +inline bool is_block_file(file_status s) noexcept { + return s.type()==file_type::block; +} + +inline bool is_block_file(const path& p) { + return is_block_file(status(p)); +} + +inline bool is_block_file(const path& p, std::error_code& ec) noexcept { + return is_block_file(status(p, ec)); +} + +inline bool is_character_file(file_status s) noexcept { + return s.type()==file_type::character; +} + +inline bool is_character_file(const path& p) { + return is_character_file(status(p)); +} + +inline bool is_character_file(const path& p, std::error_code& ec) noexcept { + return is_character_file(status(p, ec)); +} + +inline bool is_directory(file_status s) noexcept { return s.type()==file_type::directory; } @@ -369,11 +438,23 @@ inline bool is_directory(const path& p) { return is_directory(status(p)); } -inline bool is_directory(const path& p, std::error_code& ec) { +inline bool is_directory(const path& p, std::error_code& ec) noexcept { return is_directory(status(p, ec)); } -inline bool is_regular_file(file_status s) { +inline bool is_fifo(file_status s) noexcept { + return s.type()==file_type::fifo; +} + +inline bool is_fifo(const path& p) { + return is_fifo(status(p)); +} + +inline bool is_fifo(const path& p, std::error_code& ec) noexcept { + return is_fifo(status(p, ec)); +} + +inline bool is_regular_file(file_status s) noexcept { return s.type()==file_type::regular; } @@ -381,33 +462,193 @@ inline bool is_regular_file(const path& p) { return is_regular_file(status(p)); } -inline bool is_regular_file(const path& p, std::error_code& ec) { +inline bool is_regular_file(const path& p, std::error_code& ec) noexcept { return is_regular_file(status(p, ec)); } -inline bool is_character_file(file_status s) { - return s.type()==file_type::character; +inline bool is_socket(file_status s) noexcept { + return s.type()==file_type::regular; } -inline bool is_character_file(const path& p) { - return is_character_file(status(p)); +inline bool is_socket(const path& p) { + return is_socket(status(p)); } -inline bool is_character_file(const path& p, std::error_code& ec) { - return is_character_file(status(p, ec)); +inline bool is_socket(const path& p, std::error_code& ec) noexcept { + return is_socket(status(p, ec)); } -inline bool exists(file_status s) { - return s.type()!=file_type::not_found; +inline bool is_symlink(file_status s) noexcept { + return s.type()==file_type::regular; } -inline bool exists(const path& p) { - return exists(status(p)); +inline bool is_symlink(const path& p) { + return is_symlink(status(p)); } -inline bool exists(const path& p, std::error_code& ec) { - return exists(status(p, ec)); +inline bool is_symlink(const path& p, std::error_code& ec) noexcept { + return is_symlink(status(p, ec)); +} + +// Directory entries and iterators. +// (Subset of std::filesystem::directory_entry functionality.) + +struct directory_entry { + directory_entry() = default; + directory_entry(const directory_entry&) = default; + directory_entry(directory_entry&&) noexcept = default; + + directory_entry& operator=(const directory_entry&) = default; + directory_entry& operator=(directory_entry&&) = default; // remove 'noexcept' for clang-4.0 + + explicit directory_entry(const path& p) { assign(p); } + directory_entry(const path& p, std::error_code& ec) { assign(p, ec); } + + // Set file type explicity: interface for directory_iterator. + directory_entry(const path& p, file_type type, std::error_code& ec): + path_(p), status_(type) + { + if (type==file_type::unknown) { // no information from readdir() + refresh(ec); + } + } + + void refresh() { + status_ = status(path_); + } + + void refresh(std::error_code &ec) { + status_ = status(path_, ec); + } + + void assign(const path& p) { + path_ = p; + refresh(); + } + + void assign(const path& p, std::error_code &ec) { + path_ = p; + refresh(ec); + } + + const sup::path& path() const noexcept { return path_; } + operator const sup::path&() const noexcept { return path_; } + + bool is_block_file() const { return sup::is_block_file(status_); } + bool is_directory() const { return sup::is_directory(status_); } + bool is_character_file() const { return sup::is_character_file(status_); } + bool is_fifo() const { return sup::is_fifo(status_); } + bool is_regular_file() const { return sup::is_regular_file(status_); } + bool is_socket() const { return sup::is_socket(status_); } + bool is_symlink() const { return sup::is_symlink(status_); } + + bool operator==(const directory_entry& e) const { return path_==e.path_; } + bool operator!=(const directory_entry& e) const { return path_!=e.path_; } + bool operator<=(const directory_entry& e) const { return path_<=e.path_; } + bool operator>=(const directory_entry& e) const { return path_>=e.path_; } + bool operator<(const directory_entry& e) const { return path_<e.path_; } + bool operator>(const directory_entry& e) const { return path_>e.path_; } + +private: + sup::path path_; + file_status status_; +}; + +enum class directory_options: unsigned { + none = 0, + follow_directory_symlink = 1, + skip_permission_denied = 2 +}; + +inline constexpr directory_options operator|(directory_options a, directory_options b) { + return directory_options(unsigned(a)|unsigned(b)); +} + +inline constexpr directory_options operator&(directory_options a, directory_options b) { + return directory_options(unsigned(a)&unsigned(b)); +} + +inline constexpr directory_options operator^(directory_options a, directory_options b) { + return directory_options(unsigned(a)^unsigned(b)); } +inline constexpr directory_options operator~(directory_options a) { + return directory_options(~unsigned(a)); +} + +inline constexpr directory_options& operator|=(directory_options& a, directory_options b) { + return a = directory_options(unsigned(a)|unsigned(b)); +} + +inline constexpr directory_options& operator&=(directory_options& a, directory_options b) { + return a = directory_options(unsigned(a)&unsigned(b)); +} + +inline constexpr directory_options& operator^=(directory_options& a, directory_options b) { + return a = directory_options(unsigned(a)^unsigned(b)); +} + +inline constexpr bool operator==(directory_options a, unsigned x) { + return unsigned(a)==x; +} + +inline constexpr bool operator!=(directory_options a, unsigned x) { + return unsigned(a)!=x; +} + + +struct posix_directory_state; + +struct posix_directory_iterator { + using value_type = directory_entry; + using difference_type = std::ptrdiff_t; + using pointer = const directory_entry*; + using reference = const directory_entry&; + using iterator_category = std::input_iterator_tag; + + posix_directory_iterator() noexcept = default; + posix_directory_iterator(const posix_directory_iterator&) = default; + posix_directory_iterator(posix_directory_iterator&&) = default; + + posix_directory_iterator(const path&, directory_options); + posix_directory_iterator(const path& p): + posix_directory_iterator(p, directory_options::none) {} + + posix_directory_iterator(const path&, directory_options, std::error_code&); + posix_directory_iterator(const path& p, std::error_code& ec): + posix_directory_iterator(p, directory_options::none, ec) {} + + bool operator==(const posix_directory_iterator&) const; + bool operator!=(const posix_directory_iterator& i) const { + return !(*this==i); + } + + value_type operator*() const; + const value_type* operator->() const; + + posix_directory_iterator& operator++(); + posix_directory_iterator& increment(std::error_code &ec); + + posix_directory_iterator& operator=(const posix_directory_iterator&) = default; + +private: + std::shared_ptr<posix_directory_state> state_; +}; + +using directory_iterator = posix_directory_iterator; +inline directory_iterator begin(directory_iterator i) noexcept { return i; } +inline directory_iterator end(const directory_iterator& i) noexcept { return directory_iterator{}; } + } // namespace sup +// Hash based on canonical path string in namespace std. +namespace std { + template<> struct hash<::sup::path> { + using argument_type = ::sup::path; + using result_type = std::size_t; + result_type operator()(const argument_type& a) const noexcept { + return hash_value(a); + } + }; +} + diff --git a/sup/path.cpp b/sup/path.cpp index 441e5768795edf117125ef5202af259142d32f5e..5622f687b3b7b0cca53387ef1da2f6a809331f2d 100644 --- a/sup/path.cpp +++ b/sup/path.cpp @@ -1,6 +1,8 @@ // POSIX headers extern "C" { +#define _DEFAULT_SOURCE #include <sys/stat.h> +#include <dirent.h> } #include <cerrno> @@ -8,10 +10,9 @@ extern "C" { #include <sup/path.hpp> namespace sup { -namespace posix { namespace impl { - file_status status(const char* p, int r, struct stat& st, std::error_code& ec) { + file_status status(const char* p, int r, struct stat& st, std::error_code& ec) noexcept { if (!r) { // Success: ec.clear(); @@ -51,18 +52,123 @@ namespace impl { } // namespace impl -file_status status(const path& p, std::error_code& ec) { +file_status posix_status(const path& p, std::error_code& ec) noexcept { struct stat st; int r = stat(p.c_str(), &st); return impl::status(p.c_str(), r, st, ec); } -file_status symlink_status(const path& p, std::error_code& ec) { +file_status posix_symlink_status(const path& p, std::error_code& ec) noexcept { struct stat st; int r = lstat(p.c_str(), &st); return impl::status(p.c_str(), r, st, ec); } -} // namespace posix +struct posix_directory_state { + DIR* dir = nullptr; + path dir_path; + directory_entry entry; + + posix_directory_state() = default; + ~posix_directory_state() { + if (dir) closedir(dir); + } +}; + +posix_directory_iterator::posix_directory_iterator(const path& p, directory_options diropt) { + std::error_code ec; + *this = posix_directory_iterator(p, diropt, ec); + if (ec) throw filesystem_error("opendir()", p, ec); +} + +posix_directory_iterator::posix_directory_iterator(const path& p, directory_options diropt, std::error_code& ec): + state_(new posix_directory_state()) +{ + ec.clear(); + if ((state_->dir = opendir(p.c_str()))) { + state_->dir_path = p; + increment(ec); + return; + } + + if (errno==EACCES && (diropt&directory_options::skip_permission_denied)!=0) return; + ec = std::error_code(errno, std::generic_category()); +} + +static inline bool is_dot_or_dotdot(const char* s) { + return *s=='.' && (!s[1] || (s[1]=='.' && !s[2])); +} + +posix_directory_iterator& posix_directory_iterator::increment(std::error_code &ec) { + enum file_type type = file_type::none; + + ec.clear(); + if (!state_->dir) return *this; + + struct dirent* dp = nullptr; + do { + errno = 0; + dp = readdir(state_->dir); + } while (dp && is_dot_or_dotdot(dp->d_name)); + + if (!dp) { + if (errno) ec = std::error_code(errno, std::generic_category()); + state_.reset(); + } + else { +#if defined(DT_UNKNOWN) + switch (dp->d_type) { + case DT_BLK: + type = file_type::block; + break; + case DT_CHR: + type = file_type::character; + break; + case DT_DIR: + type = file_type::directory; + break; + case DT_FIFO: + type = file_type::fifo; + break; + case DT_LNK: + type = file_type::symlink; + break; + case DT_SOCK: + type = file_type::socket; + break; + case DT_UNKNOWN: // fallthrough + default: + type = file_type::unknown; + } +#else + type = file_type::unknown; +#endif + state_->entry = directory_entry(state_->dir_path/path(dp->d_name), type, ec); + } + + return *this; +} + +posix_directory_iterator& posix_directory_iterator::operator++() { + std::error_code ec; + increment(ec); + if (ec) throw filesystem_error("readdir()", state_->dir_path, ec); + return *this; +} + +directory_entry posix_directory_iterator::operator*() const { + return state_->entry; +} + +const directory_entry* posix_directory_iterator::operator->() const { + return &(state_->entry); +} + +bool posix_directory_iterator::operator==(const posix_directory_iterator& x) const { + bool end1 = !state_ || !state_->dir; + bool end2 = !x.state_ || !x.state_->dir; + return end1 || end2? end1 && end2: state_->entry == x.state_->entry; +} + } // namespace sup diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 439a1034ffa11e848d4fe29c432582a4509ac7f4..f64b834bc3e9526ddbc5c07562656bb8166533a6 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -68,6 +68,7 @@ set(unit_sources test_filter.cpp test_fvm_layout.cpp test_fvm_lowered.cpp + test_glob_basic.cpp test_mc_cell_group.cpp test_lexcmp.cpp test_lif_cell_group.cpp diff --git a/test/unit/test_glob_basic.cpp b/test/unit/test_glob_basic.cpp new file mode 100644 index 0000000000000000000000000000000000000000..114ee002aa689989519495a229d3a437d5995eca --- /dev/null +++ b/test/unit/test_glob_basic.cpp @@ -0,0 +1,225 @@ +#include "../gtest.h" + +#include <sup/glob.hpp> +#include <sup/path.hpp> + +using namespace sup; + +#include <iterator> +#include <string> +#include <unordered_map> + +TEST(glob, pattern) { + EXPECT_TRUE( glob_basic_match( "literal", "literal")); + EXPECT_FALSE(glob_basic_match("literal", "Literal")); + + EXPECT_TRUE( glob_basic_match("[a-z][A-Z]", "aA")); + EXPECT_TRUE( glob_basic_match("[a-z][A-Z]", "zZ")); + EXPECT_TRUE( glob_basic_match("[a-z][A-Z]", "bQ")); + EXPECT_FALSE(glob_basic_match("[a-z][A-Z]", "AA")); + EXPECT_FALSE(glob_basic_match("[a-z][A-Z]", "A@")); + + EXPECT_TRUE (glob_basic_match("[!0-9a]", "A")); + EXPECT_FALSE(glob_basic_match("[!0-9a]", "0")); + EXPECT_FALSE(glob_basic_match("[!0-9a]", "5")); + EXPECT_FALSE(glob_basic_match("[!0-9a]", "9")); + EXPECT_FALSE(glob_basic_match("[!0-9a]", "a")); + + EXPECT_TRUE (glob_basic_match("[-q]", "-")); + EXPECT_TRUE (glob_basic_match("[-q]", "q")); + EXPECT_FALSE(glob_basic_match("[-q]", "p")); + + EXPECT_TRUE (glob_basic_match("[q-]", "-")); + EXPECT_TRUE (glob_basic_match("[q-]", "q")); + EXPECT_FALSE(glob_basic_match("[-q]", "p")); + + EXPECT_TRUE (glob_basic_match("[!a-]", "b")); + EXPECT_FALSE(glob_basic_match("[!a-]", "a")); + EXPECT_FALSE(glob_basic_match("[!a-]", "-")); + + EXPECT_TRUE (glob_basic_match("[]-]z", "-z")); + EXPECT_TRUE (glob_basic_match("[]-]z", "]z")); + EXPECT_FALSE(glob_basic_match("[]-]z", "[z")); + + EXPECT_TRUE( glob_basic_match("?", "a")); + EXPECT_TRUE( glob_basic_match("?", " ")); + EXPECT_FALSE(glob_basic_match("?", " a")); + EXPECT_FALSE(glob_basic_match("?", "")); + + EXPECT_TRUE( glob_basic_match("a*b", "ab")); + EXPECT_TRUE( glob_basic_match("a*b", "abb")); + EXPECT_TRUE( glob_basic_match("a*b", "a01234b")); + EXPECT_FALSE(glob_basic_match("a*b", "ac")); + EXPECT_FALSE(glob_basic_match("a*b", "cb")); + + EXPECT_TRUE( glob_basic_match("a****b", "ab")); + EXPECT_TRUE( glob_basic_match("a****b", "a01b")); + EXPECT_FALSE(glob_basic_match("a****b", "a01")); + + EXPECT_TRUE( glob_basic_match("\\*", "*")); + EXPECT_FALSE(glob_basic_match("\\*", "z")); + + EXPECT_TRUE( glob_basic_match("\\?", "?")); + EXPECT_FALSE(glob_basic_match("\\?", "z")); + + EXPECT_TRUE( glob_basic_match("\\[p-q]", "[p-q]")); + EXPECT_FALSE(glob_basic_match("\\[p-q]", "\\p")); + EXPECT_TRUE( glob_basic_match("\\\\[p-q]", "\\p")); + + // Check for dodgy exponential behaviour... + EXPECT_FALSE( glob_basic_match( + "*x*x*x*x*x*x*x*x*x*x*x*x*x*x_", + "xxxxxxxxxxxxxxxxxxxxxxxxxxxx")); + + // Check special-case handling for initial period: + + EXPECT_FALSE(glob_basic_match("*", ".foo")); + EXPECT_TRUE( glob_basic_match(".*", ".foo")); + + EXPECT_FALSE(glob_basic_match("??", ".f")); + EXPECT_TRUE( glob_basic_match(".?", ".f")); + + EXPECT_FALSE(glob_basic_match("[.a][.a][.a]", "..a")); + EXPECT_TRUE( glob_basic_match(".[.a][.a]", "..a")); + + EXPECT_TRUE( glob_basic_match("\\.*", ".foo")); +} + +struct mock_fs_provider { + using action_type = glob_fs_provider::action_type; + + std::unordered_multimap<path, path> tree; + + mock_fs_provider() = default; + + template <typename... Tail> + mock_fs_provider(const char* name, Tail... tail) { + add_path(name, tail...); + } + + void add_path() const {} + + template <typename... Tail> + void add_path(const char* name, Tail... tail) { + if (!*name) return; + + const char* p = *name=='/'? name+1: name; + + for (const char* c = p; *p; p = c++) { + while (*c && *c!='/') ++c; + + std::pair<path, path> entry{path{name, p}, path{name, c}}; + if (tree.find(entry.second)==tree.end()) { + tree.insert(entry); + tree.insert({entry.second, path{}}); + } + } + + add_path(tail...); + } + + static path canonical_key(const path& p) { + return p.has_filename()? p: p.parent_path(); + } + + bool is_directory(const path& p) const { + auto r = tree.equal_range(canonical_key(p)); + return r.first!=r.second && std::next(r.first)!=r.second; + } + + bool exists(const path& p) const { + return tree.find(canonical_key(p))!=tree.end(); + } + + void for_each_directory(const path& p, action_type action) const { + auto r = tree.equal_range(canonical_key(p)); + for (auto i = r.first; i!=r.second; ++i) { + auto entry = i->second; + if (entry.empty()) continue; + + auto s = tree.equal_range(entry); + if (s.first!=s.second && std::next(s.first)!=s.second) action(entry); + } + } + + void for_each_entry(const path& p, action_type action) const { + auto r = tree.equal_range(canonical_key(p)); + for (auto i = r.first; i!=r.second; ++i) { + auto entry = i->second; + if (!entry.empty()) action(entry); + } + } +}; + +std::vector<path> sort_glob(const char* pattern, const glob_fs_provider& fs) { + auto results = glob_basic(pattern, fs); + std::sort(results.begin(), results.end()); + return results; +} + +TEST(glob, simple_patterns) { + glob_fs_provider fs = mock_fs_provider{"fish", "fop", "barf", "barry", "tip"}; + + using pvector = std::vector<path>; + + EXPECT_EQ(pvector({"fish", "fop"}), sort_glob("f*", fs)); + EXPECT_EQ(pvector({"fop", "tip"}), sort_glob("??p", fs)); + EXPECT_EQ(pvector(), sort_glob("x*", fs)); +} + +TEST(glob, literals) { + glob_fs_provider fs = mock_fs_provider{ + "/abc/def/ghi", + "/abc/de", + "/xyz", + "pqrs/tuv/w", + "pqrs/tuv/wxy" + }; + + using pvector = std::vector<path>; + + EXPECT_EQ(pvector({"/abc/def/ghi"}), sort_glob("/abc/def/ghi", fs)); + EXPECT_EQ(pvector({"/abc/def/ghi"}), sort_glob("/*/def/ghi", fs)); + EXPECT_EQ(pvector({"/abc/def/ghi"}), sort_glob("/*/*/ghi", fs)); + EXPECT_EQ(pvector({"/abc/def/ghi"}), sort_glob("/abc/def/*", fs)); + EXPECT_EQ(pvector({"/abc/def/ghi"}), sort_glob("/abc/*/*", fs)); + EXPECT_EQ(pvector({"pqrs/tuv/w", "pqrs/tuv/wxy"}), sort_glob("pqrs/tuv/w*", fs)); + EXPECT_EQ(pvector({"pqrs/tuv/w", "pqrs/tuv/wxy"}), sort_glob("*/tuv/w*", fs)); + EXPECT_EQ(pvector({"pqrs/tuv/w", "pqrs/tuv/wxy"}), sort_glob("pqrs/t*/w*", fs)); +} + +TEST(glob, multidir) { + glob_fs_provider fs = mock_fs_provider{ + "abc/fab/x", + "abc/fab/yz", + "abc/flib/x", + "abc/flib/yz", + "abc/rib/x", + "def/rib/yz", + "def/fab/x", + "def/fab/yz", + "def/rib/x", + "def/rib/yz" + }; + + using pvector = std::vector<path>; + + EXPECT_EQ(pvector({"abc/fab/x", "abc/flib/x"}), sort_glob("*c/f*b/?", fs)); +} + +TEST(glob, dots) { + glob_fs_provider fs = mock_fs_provider{ + "f.oo/b.ar", "f.oo/.bar", + ".foo/b.ar", ".foo/.bar" + }; + + using pvector = std::vector<path>; + + EXPECT_EQ(pvector({"f.oo/b.ar"}), sort_glob("*/*", fs)); + EXPECT_EQ(pvector({".foo/b.ar"}), sort_glob(".*/*", fs)); + EXPECT_EQ(pvector({"f.oo/b.ar"}), sort_glob("f[.z]oo/*", fs)); + EXPECT_EQ(pvector({"f.oo/b.ar"}), sort_glob("f?oo/*", fs)); + EXPECT_EQ(pvector(), sort_glob("[.z]foo/*", fs)); + EXPECT_EQ(pvector(), sort_glob("?foo/*", fs)); +} + diff --git a/test/unit/test_path.cpp b/test/unit/test_path.cpp index c52c4bbc486ee7221f70632317ebd4b9816bbe5a..0f1e7d199ecdca664e5aa91ade56e1d6cf2293cb 100644 --- a/test/unit/test_path.cpp +++ b/test/unit/test_path.cpp @@ -189,6 +189,76 @@ TEST(path, posix_swap) { EXPECT_EQ("/bar", p2.native()); } +TEST(path, filename) { + auto filename = [](auto p) { return posix_path(p).filename().native(); }; + auto has_filename = [](auto p) { return posix_path(p).has_filename(); }; + + EXPECT_EQ("foo", filename("foo")); + EXPECT_TRUE(has_filename("foo")); + + EXPECT_EQ("foo", filename("bar/foo")); + EXPECT_TRUE(has_filename("bar/foo")); + + EXPECT_EQ("foo", filename("/bar/foo")); + EXPECT_TRUE(has_filename("/bar/foo")); + + EXPECT_EQ("foo", filename("./foo")); + EXPECT_TRUE(has_filename("./foo")); + + EXPECT_EQ("foo", filename("../foo")); + EXPECT_TRUE(has_filename("../foo")); + + EXPECT_EQ(".", filename(".")); + EXPECT_TRUE(has_filename(".")); + + EXPECT_EQ("", filename("foo/")); + EXPECT_FALSE(has_filename("foo/")); + + EXPECT_EQ("", filename("foo/bar/")); + EXPECT_FALSE(has_filename("foo/bar/")); + + EXPECT_EQ("", filename("/foo/bar/")); + EXPECT_FALSE(has_filename("/foo/bar/")); + + EXPECT_EQ("", filename("./")); + EXPECT_FALSE(has_filename("./")); + + EXPECT_EQ("", filename("/")); + EXPECT_FALSE(has_filename("/")); +} + +TEST(path, parent_path) { + auto parent_path = [](auto p) { return posix_path(p).parent_path().native(); }; + auto has_parent_path = [](auto p) { return posix_path(p).has_parent_path(); }; + + EXPECT_EQ("/abc", parent_path("/abc/")); + EXPECT_TRUE(has_parent_path("/abc/")); + + EXPECT_EQ("/abc", parent_path("/abc/def")); + EXPECT_TRUE(has_parent_path("/abc/def")); + + EXPECT_EQ("/abc", parent_path("/abc/.")); + EXPECT_TRUE(has_parent_path("/abc/.")); + + EXPECT_EQ("/", parent_path("/")); + EXPECT_TRUE(has_parent_path("/")); + + EXPECT_EQ("abc", parent_path("abc/def")); + EXPECT_TRUE(has_parent_path("abc/def")); + + EXPECT_EQ("abc/def", parent_path("abc/def/ghi")); + EXPECT_TRUE(has_parent_path("abc/def/ghi")); + + EXPECT_EQ("", parent_path("abc")); + EXPECT_FALSE(has_parent_path("abc")); + + EXPECT_EQ("", parent_path(".")); + EXPECT_FALSE(has_parent_path(".")); + + EXPECT_EQ("", parent_path("")); + EXPECT_FALSE(has_parent_path("")); +} + TEST(path, posix_iostream) { std::istringstream ss("/quux/xyzzy"); posix_path p; @@ -323,3 +393,31 @@ TEST(path, posix_status_perms) { EXPECT_NE(perms::none, root_perm&perms::others_exec); } +TEST(path, posix_directory_iterators) { + // Expect that /dev exists and that iterating on /dev will give + // an entry called 'null'. (This is guaranteed by POSIX.) + // + // Expect that the file type checks as given by is_block_file(), + // is_fifo() etc. will agree for directory iterators and paths. + + auto it = directory_iterator("/dev"); + EXPECT_NE(directory_iterator(), it); // Equal => empty directory. + + bool found_dev_null = false; + for (; it!=directory_iterator(); ++it) { + if (it->path()=="/dev/null") found_dev_null = true; + + file_status st = symlink_status(it->path()); + + // Check file type tests match up. + EXPECT_EQ(it->is_block_file(), is_block_file(st)); + EXPECT_EQ(it->is_directory(), is_directory(st)); + EXPECT_EQ(it->is_character_file(), is_character_file(st)); + EXPECT_EQ(it->is_fifo(), is_fifo(st)); + EXPECT_EQ(it->is_regular_file(), is_regular_file(st)); + EXPECT_EQ(it->is_socket(), is_socket(st)); + EXPECT_EQ(it->is_symlink(), is_symlink(st)); + } + + EXPECT_TRUE(found_dev_null); +}