#include #include #include #include #include #include /** * We try to default to a different path style depending on the operating * system. So this should detect whether we should use windows or unix paths. */ #if defined(WIN32) || defined(_WIN32) || \ defined(__WIN32) && !defined(__CYGWIN__) static enum cwk_path_style path_style = CWK_STYLE_WINDOWS; #else static enum cwk_path_style path_style = CWK_STYLE_UNIX; #endif /** * This is a list of separators used in different styles. Windows can read * multiple separators, but it generally outputs just a backslash. The output * will always use the first character for the output. */ static const char *separators[] = { "\\/", // CWK_STYLE_WINDOWS "/" // CWK_STYLE_UNIX }; /** * A joined path represents multiple path strings which are concatenated, but * not (necessarily) stored in contiguous memory. The joined path allows to * iterate over the segments as if it was one piece of path. */ struct cwk_segment_joined { struct cwk_segment segment; const char **paths; size_t path_index; }; static size_t cwk_path_output_sized(char *buffer, size_t buffer_size, size_t position, const char *str, size_t length) { size_t amount_written; // First we determine the amount which we can write to the buffer. There are // three cases. In the first case we have enough to store the whole string in // it. In the second one we can only store a part of it, and in the third we // have no space left. if (buffer_size > position + length) { amount_written = length; } else if (buffer_size > position) { amount_written = buffer_size - position; } else { amount_written = 0; } // If we actually want to write out something we will do that here. We will // always append a '\0', this way we are guaranteed to have a valid string at // all times. if (amount_written > 0) { memmove(&buffer[position], str, amount_written); } // Return the theoretical length which would have been written when everything // would have fit in the buffer. return length; } static size_t cwk_path_output_current(char *buffer, size_t buffer_size, size_t position) { // We output a "current" directory, which is a single character. This // character is currently not style dependant. return cwk_path_output_sized(buffer, buffer_size, position, ".", 1); } static size_t cwk_path_output_back(char *buffer, size_t buffer_size, size_t position) { // We output a "back" directory, which ahs two characters. This // character is currently not style dependant. return cwk_path_output_sized(buffer, buffer_size, position, "..", 2); } static size_t cwk_path_output_separator(char *buffer, size_t buffer_size, size_t position) { // We output a separator, which is a single character. return cwk_path_output_sized(buffer, buffer_size, position, separators[path_style], 1); } static size_t cwk_path_output_dot(char *buffer, size_t buffer_size, size_t position) { // We output a dot, which is a single character. This is used for extensions. return cwk_path_output_sized(buffer, buffer_size, position, ".", 1); } static size_t cwk_path_output(char *buffer, size_t buffer_size, size_t position, const char *str) { size_t length; // This just does a sized output internally, but first measuring the // null-terminated string. length = strlen(str); return cwk_path_output_sized(buffer, buffer_size, position, str, length); } static void cwk_path_terminate_output(char *buffer, size_t buffer_size, size_t pos) { if (buffer_size > 0) { if (pos >= buffer_size) { buffer[buffer_size - 1] = '\0'; } else { buffer[pos] = '\0'; } } } static bool cwk_path_is_string_equal(const char *first, const char *second, size_t n) { // If the path style is UNIX, we will compare case sensitively. This can be // done easily using strncmp. if (path_style == CWK_STYLE_UNIX) { return strncmp(first, second, n) == 0; } // However, if this is windows we will have to compare case insensitively. // Since there is no standard method to do that we will have to do it on our // own. while (*first && *second && n > 0) { // We can consider the string to be not equal if the two lowercase // characters are not equal. if (tolower(*first++) != tolower(*second++)) { return false; } --n; } // We can consider the string to be equal if we either reached n == 0 or both // cursors point to a null character. return n == 0 || (*first == '\0' && *second == '\0'); } static const char *cwk_path_find_next_stop(const char *c) { // We just move forward until we find a '\0' or a separator, which will be our // next "stop". while (*c != '\0' && !cwk_path_is_separator(c)) { ++c; } // Return the pointer of the next stop. return c; } static const char *cwk_path_find_previous_stop(const char *begin, const char *c) { // We just move back until we find a separator or reach the beginning of the // path, which will be our previous "stop". while (c > begin && !cwk_path_is_separator(c)) { --c; } // Return the pointer to the previous stop. We have to return the first // character after the separator, not on the separator itself. if (cwk_path_is_separator(c)) { return c + 1; } else { return c; } } static bool cwk_path_get_first_segment_without_root(const char *path, const char *segments, struct cwk_segment *segment) { // Let's remember the path. We will move the path pointer afterwards, that's // why this has to be done first. segment->path = path; segment->segments = segments; segment->begin = segments; segment->end = segments; segment->size = 0; // Now let's check whether this is an empty string. An empty string has no // segment it could use. if (*segments == '\0') { return false; } // If the string starts with separators, we will jump over those. If there is // only a slash and a '\0' after it, we can't determine the first segment // since there is none. while (cwk_path_is_separator(segments)) { ++segments; if (*segments == '\0') { return false; } } // So this is the beginning of our segment. segment->begin = segments; // Now let's determine the end of the segment, which we do by moving the path // pointer further until we find a separator. segments = cwk_path_find_next_stop(segments); // And finally, calculate the size of the segment by subtracting the position // from the end. segment->size = (size_t)(segments - segment->begin); segment->end = segments; // Tell the caller that we found a segment. return true; } static bool cwk_path_get_last_segment_without_root(const char *path, struct cwk_segment *segment) { // Now this is fairly similar to the normal algorithm, however, it will assume // that there is no root in the path. So we grab the first segment at this // position, assuming there is no root. if (!cwk_path_get_first_segment_without_root(path, path, segment)) { return false; } // Now we find our last segment. The segment struct of the caller // will contain the last segment, since the function we call here will not // change the segment struct when it reaches the end. while (cwk_path_get_next_segment(segment)) { // We just loop until there is no other segment left. } return true; } static bool cwk_path_get_first_segment_joined(const char **paths, struct cwk_segment_joined *sj) { bool result; // Prepare the first segment. We position the joined segment on the first path // and assign the path array to the struct. sj->path_index = 0; sj->paths = paths; // We loop through all paths until we find one which has a segment. The result // is stored in a variable, so we can let the caller know whether we found one // or not. result = false; while (paths[sj->path_index] != NULL && (result = cwk_path_get_first_segment(paths[sj->path_index], &sj->segment)) == false) { ++sj->path_index; } return result; } static bool cwk_path_get_next_segment_joined(struct cwk_segment_joined *sj) { bool result; if (sj->paths[sj->path_index] == NULL) { // We reached already the end of all paths, so there is no other segment // left. return false; } else if (cwk_path_get_next_segment(&sj->segment)) { // There was another segment on the current path, so we are good to // continue. return true; } // We try to move to the next path which has a segment available. We must at // least move one further since the current path reached the end. result = false; do { ++sj->path_index; // And we obviously have to stop this loop if there are no more paths left. if (sj->paths[sj->path_index] == NULL) { break; } // Grab the first segment of the next path and determine whether this path // has anything useful in it. There is one more thing we have to consider // here - for the first time we do this we want to skip the root, but // afterwards we will consider that to be part of the segments. result = cwk_path_get_first_segment_without_root(sj->paths[sj->path_index], sj->paths[sj->path_index], &sj->segment); } while (!result); // Finally, report the result back to the caller. return result; } static bool cwk_path_get_previous_segment_joined(struct cwk_segment_joined *sj) { bool result; if (*sj->paths == NULL) { // It's possible that there is no initialized segment available in the // struct since there are no paths. In that case we can return false, since // there is no previous segment. return false; } else if (cwk_path_get_previous_segment(&sj->segment)) { // Now we try to get the previous segment from the current path. If we can // do that successfully, we can let the caller know that we found one. return true; } result = false; do { // We are done once we reached index 0. In that case there are no more // segments left. if (sj->path_index == 0) { break; } // There is another path which we have to inspect. So we decrease the path // index. --sj->path_index; // If this is the first path we will have to consider that this path might // include a root, otherwise we just treat is as a segment. if (sj->path_index == 0) { result = cwk_path_get_last_segment(sj->paths[sj->path_index], &sj->segment); } else { result = cwk_path_get_last_segment_without_root(sj->paths[sj->path_index], &sj->segment); } } while (!result); return result; } static bool cwk_path_segment_back_will_be_removed(struct cwk_segment_joined *sj) { enum cwk_segment_type type; int counter; // We are handling back segments here. We must verify how many back segments // and how many normal segments come before this one to decide whether we keep // or remove it. // The counter determines how many normal segments are our current segment, // which will popped off before us. If the counter goes above zero it means // that our segment will be popped as well. counter = 0; // We loop over all previous segments until we either reach the beginning, // which means our segment will not be dropped or the counter goes above zero. while (cwk_path_get_previous_segment_joined(sj)) { // Now grab the type. The type determines whether we will increase or // decrease the counter. We don't handle a CWK_CURRENT frame here since it // has no influence. type = cwk_path_get_segment_type(&sj->segment); if (type == CWK_NORMAL) { // This is a normal segment. The normal segment will increase the counter // since it neutralizes one back segment. If we go above zero we can // return immediately. ++counter; if (counter > 0) { return true; } } else if (type == CWK_BACK) { // A CWK_BACK segment will reduce the counter by one. We can not remove a // back segment as long we are not above zero since we don't have the // opposite normal segment which we would remove. --counter; } } // We never got a count larger than zero, so we will keep this segment alive. return false; } static bool cwk_path_segment_normal_will_be_removed( struct cwk_segment_joined *sj) { enum cwk_segment_type type; int counter; // The counter determines how many segments are above our current segment, // which will popped off before us. If the counter goes below zero it means // that our segment will be popped as well. counter = 0; // We loop over all following segments until we either reach the end, which // means our segment will not be dropped or the counter goes below zero. while (cwk_path_get_next_segment_joined(sj)) { // First, grab the type. The type determines whether we will increase or // decrease the counter. We don't handle a CWK_CURRENT frame here since it // has no influence. type = cwk_path_get_segment_type(&sj->segment); if (type == CWK_NORMAL) { // This is a normal segment. The normal segment will increase the counter // since it will be removed by a "../" before us. ++counter; } else if (type == CWK_BACK) { // A CWK_BACK segment will reduce the counter by one. If we are below zero // we can return immediately. --counter; if (counter < 0) { return true; } } } // We never got a negative count, so we will keep this segment alive. return false; } static bool cwk_path_segment_will_be_removed(const struct cwk_segment_joined *sj, bool absolute) { enum cwk_segment_type type; struct cwk_segment_joined sjc; // We copy the joined path so we don't need to modify it. sjc = *sj; // First we check whether this is a CWK_CURRENT or CWK_BACK segment, since // those will always be dropped. type = cwk_path_get_segment_type(&sj->segment); if (type == CWK_CURRENT) { return true; } else if (type == CWK_BACK && absolute) { return true; } else if (type == CWK_BACK) { return cwk_path_segment_back_will_be_removed(&sjc); } else { return cwk_path_segment_normal_will_be_removed(&sjc); } } static bool cwk_path_segment_joined_skip_invisible(struct cwk_segment_joined *sj, bool absolute) { while (cwk_path_segment_will_be_removed(sj, absolute)) { if (!cwk_path_get_next_segment_joined(sj)) { return false; } } return true; } static void cwk_path_get_root_windows(const char *path, size_t *length) { const char *c; bool is_device_path; // A device path is a path which starts with "\\." or "\\?". A device path can // be a UNC path as well, in which case it will take up one more segment. is_device_path = false; // We can not determine the root if this is an empty string. So we set the // root to NULL and the length to zero and cancel the whole thing. c = path; *length = 0; if (!*c) { return; } // Now we have to verify whether this is a windows network path (UNC), which // we will consider our root. if (cwk_path_is_separator(c)) { ++c; // Check whether the path starts with a single back slash, which means this // is not a network path - just a normal path starting with a backslash. if (!cwk_path_is_separator(c)) { // Okay, this is not a network path but we still use the backslash as a // root. ++(*length); return; } // Yes, this is a network or device path. Skip the previous separator. Now // we need to determine whether this is a device path. We might advance one // character here if the server name starts with a '?' or a '.', but that's // fine since we will search for a separator afterwards anyway. ++c; is_device_path = (*c == '?' || *c == '.') && cwk_path_is_separator(++c); if (is_device_path) { // That's a device path, and the root must be either "\\.\" or "\\?\" // which is 4 characters long. (at least that's how Windows // GetFullPathName behaves.) *length = 4; return; } // We will grab anything up to the next stop. The next stop might be a '\0' // or another separator. That will be the server name. c = cwk_path_find_next_stop(c); // If this is a separator and not the end of a string we wil have to include // it. However, if this is a '\0' we must not skip it. while (cwk_path_is_separator(c)) { ++c; } // We are now skipping the shared folder name, which will end after the // next stop. c = cwk_path_find_next_stop(c); // Then there might be a separator at the end. We will include that as well, // it will mark the path as absolute. if (cwk_path_is_separator(c)) { ++c; } // Finally, calculate the size of the root. *length = (size_t)(c - path); return; } // Move to the next and check whether this is a colon. if (*++c == ':') { *length = 2; // Now check whether this is a backslash (or slash). If it is not, we could // assume that the next character is a '\0' if it is a valid path. However, // we will not assume that - since ':' is not valid in a path it must be a // mistake by the caller than. We will try to understand it anyway. if (cwk_path_is_separator(++c)) { *length = 3; } } } static void cwk_path_get_root_unix(const char *path, size_t *length) { // The slash of the unix path represents the root. There is no root if there // is no slash. if (cwk_path_is_separator(path)) { *length = 1; } else { *length = 0; } } static bool cwk_path_is_root_absolute(const char *path, size_t length) { // This is definitely not absolute if there is no root. if (length == 0) { return false; } // If there is a separator at the end of the root, we can safely consider this // to be an absolute path. return cwk_path_is_separator(&path[length - 1]); } static size_t cwk_path_join_and_normalize_multiple(const char **paths, char *buffer, size_t buffer_size) { size_t pos; bool absolute, has_segment_output; struct cwk_segment_joined sj; // We initialize the position after the root, which should get us started. cwk_path_get_root(paths[0], &pos); // Determine whether the path is absolute or not. We need that to determine // later on whether we can remove superfluous "../" or not. absolute = cwk_path_is_root_absolute(paths[0], pos); // First copy the root to the output. We will not modify the root. cwk_path_output_sized(buffer, buffer_size, 0, paths[0], pos); // So we just grab the first segment. If there is no segment we will always // output a "/", since we currently only support absolute paths here. if (!cwk_path_get_first_segment_joined(paths, &sj)) { goto done; } // Let's assume that we don't have any segment output for now. We will toggle // this flag once there is some output. has_segment_output = false; do { // Check whether we have to drop this segment because of resolving a // relative path or because it is a CWK_CURRENT segment. if (cwk_path_segment_will_be_removed(&sj, absolute)) { continue; } // We add a separator if we previously wrote a segment. The last segment // must not have a trailing separator. This must happen before the segment // output, since we would override the null terminating character with // reused buffers if this was done afterwards. if (has_segment_output) { pos += cwk_path_output_separator(buffer, buffer_size, pos); } // Remember that we have segment output, so we can handle the trailing slash // later on. This is necessary since we might have segments but they are all // removed. has_segment_output = true; // Write out the segment but keep in mind that we need to follow the // buffer size limitations. That's why we use the path output functions // here. pos += cwk_path_output_sized(buffer, buffer_size, pos, sj.segment.begin, sj.segment.size); } while (cwk_path_get_next_segment_joined(&sj)); // Remove the trailing slash, but only if we have segment output. We don't // want to remove anything from the root. if (!has_segment_output && pos == 0) { // This may happen if the path is absolute and all segments have been // removed. We can not have an empty output - and empty output means we stay // in the current directory. So we will output a ".". assert(absolute == false); pos += cwk_path_output_current(buffer, buffer_size, pos); } // We must append a '\0' in any case, unless the buffer size is zero. If the // buffer size is zero, which means we can not. done: cwk_path_terminate_output(buffer, buffer_size, pos); // And finally let our caller know about the total size of the normalized // path. return pos; } size_t cwk_path_get_absolute(const char *base, const char *path, char *buffer, size_t buffer_size) { size_t i; const char *paths[4]; // The basename should be an absolute path if the caller is using the API // correctly. However, he might not and in that case we will append a fake // root at the beginning. if (cwk_path_is_absolute(base)) { i = 0; } else if (path_style == CWK_STYLE_WINDOWS) { paths[0] = "\\"; i = 1; } else { paths[0] = "/"; i = 1; } if (cwk_path_is_absolute(path)) { // If the submitted path is not relative the base path becomes irrelevant. // We will only normalize the submitted path instead. paths[i++] = path; paths[i] = NULL; } else { // Otherwise we append the relative path to the base path and normalize it. // The result will be a new absolute path. paths[i++] = base; paths[i++] = path; paths[i] = NULL; } // Finally join everything together and normalize it. return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size); } static void cwk_path_skip_segments_until_diverge(struct cwk_segment_joined *bsj, struct cwk_segment_joined *osj, bool absolute, bool *base_available, bool *other_available) { // Now looping over all segments until they start to diverge. A path may // diverge if two segments are not equal or if one path reaches the end. do { // Check whether there is anything available after we skip everything which // is invisible. We do that for both paths, since we want to let the caller // know which path has some trailing segments after they diverge. *base_available = cwk_path_segment_joined_skip_invisible(bsj, absolute); *other_available = cwk_path_segment_joined_skip_invisible(osj, absolute); // We are done if one or both of those paths reached the end. They either // diverge or both reached the end - but in both cases we can not continue // here. if (!*base_available || !*other_available) { break; } // Compare the content of both segments. We are done if they are not equal, // since they diverge. if (!cwk_path_is_string_equal(bsj->segment.begin, osj->segment.begin, bsj->segment.size)) { break; } // We keep going until one of those segments reached the end. The next // segment might be invisible, but we will check for that in the beginning // of the loop once again. *base_available = cwk_path_get_next_segment_joined(bsj); *other_available = cwk_path_get_next_segment_joined(osj); } while (*base_available && *other_available); } size_t cwk_path_get_relative(const char *base_directory, const char *path, char *buffer, size_t buffer_size) { size_t pos, base_root_length, path_root_length; bool absolute, base_available, other_available, has_output; const char *base_paths[2], *other_paths[2]; struct cwk_segment_joined bsj, osj; pos = 0; // First we compare the roots of those two paths. If the roots are not equal // we can't continue, since there is no way to get a relative path from // different roots. cwk_path_get_root(base_directory, &base_root_length); cwk_path_get_root(path, &path_root_length); if (base_root_length != path_root_length || !cwk_path_is_string_equal(base_directory, path, base_root_length)) { cwk_path_terminate_output(buffer, buffer_size, pos); return pos; } // Verify whether this is an absolute path. We need to know that since we can // remove all back-segments if it is. absolute = cwk_path_is_root_absolute(base_directory, base_root_length); // Initialize our joined segments. This will allow us to use the internal // functions to skip until diverge and invisible. We only have one path in // them though. base_paths[0] = base_directory; base_paths[1] = NULL; other_paths[0] = path; other_paths[1] = NULL; cwk_path_get_first_segment_joined(base_paths, &bsj); cwk_path_get_first_segment_joined(other_paths, &osj); // Okay, now we skip until the segments diverge. We don't have anything to do // with the segments which are equal. cwk_path_skip_segments_until_diverge(&bsj, &osj, absolute, &base_available, &other_available); // Assume there is no output until we have got some. We will need this // information later on to remove trailing slashes or alternatively output a // current-segment. has_output = false; // So if we still have some segments left in the base path we will now output // a back segment for all of them. if (base_available) { do { // Skip any invisible segment. We don't care about those and we don't need // to navigate back because of them. if (!cwk_path_segment_joined_skip_invisible(&bsj, absolute)) { break; } // Toggle the flag if we have output. We need to remember that, since we // want to remove the trailing slash. has_output = true; // Output the back segment and a separator. No need to worry about the // superfluous segment since it will be removed later on. pos += cwk_path_output_back(buffer, buffer_size, pos); pos += cwk_path_output_separator(buffer, buffer_size, pos); } while (cwk_path_get_next_segment_joined(&bsj)); } // And if we have some segments available of the target path we will output // all of those. if (other_available) { do { // Again, skip any invisible segments since we don't need to navigate into // them. if (!cwk_path_segment_joined_skip_invisible(&osj, absolute)) { break; } // Toggle the flag if we have output. We need to remember that, since we // want to remove the trailing slash. has_output = true; // Output the current segment and a separator. No need to worry about the // superfluous segment since it will be removed later on. pos += cwk_path_output_sized(buffer, buffer_size, pos, osj.segment.begin, osj.segment.size); pos += cwk_path_output_separator(buffer, buffer_size, pos); } while (cwk_path_get_next_segment_joined(&osj)); } // If we have some output by now we will have to remove the trailing slash. We // simply do that by moving back one character. The terminate output function // will then place the '\0' on this position. Otherwise, if there is no // output, we will have to output a "current directory", since the target path // points to the base path. if (has_output) { --pos; } else { pos += cwk_path_output_current(buffer, buffer_size, pos); } // Finally, we can terminate the output - which means we place a '\0' at the // current position or at the end of the buffer. cwk_path_terminate_output(buffer, buffer_size, pos); return pos; } size_t cwk_path_join(const char *path_a, const char *path_b, char *buffer, size_t buffer_size) { const char *paths[3]; // This is simple. We will just create an array with the two paths which we // wish to join. paths[0] = path_a; paths[1] = path_b; paths[2] = NULL; // And then call the join and normalize function which will do the hard work // for us. return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size); } size_t cwk_path_join_multiple(const char **paths, char *buffer, size_t buffer_size) { // We can just call the internal join and normalize function for this one, // since it will handle everything. return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size); } void cwk_path_get_root(const char *path, size_t *length) { // We use a different implementation here based on the configuration of the // library. if (path_style == CWK_STYLE_WINDOWS) { cwk_path_get_root_windows(path, length); } else { cwk_path_get_root_unix(path, length); } } size_t cwk_path_change_root(const char *path, const char *new_root, char *buffer, size_t buffer_size) { const char *tail; size_t root_length, path_length, tail_length, new_root_length, new_path_size; // First we need to determine the actual size of the root which we will // change. cwk_path_get_root(path, &root_length); // Now we determine the sizes of the new root and the path. We need that to // determine the size of the part after the root (the tail). new_root_length = strlen(new_root); path_length = strlen(path); // Okay, now we calculate the position of the tail and the length of it. tail = path + root_length; tail_length = path_length - root_length; // We first output the tail and then the new root, that's because the source // path and the buffer may be overlapping. This way the root will not // overwrite the tail. cwk_path_output_sized(buffer, buffer_size, new_root_length, tail, tail_length); cwk_path_output_sized(buffer, buffer_size, 0, new_root, new_root_length); // Finally we calculate the size o the new path and terminate the output with // a '\0'. new_path_size = tail_length + new_root_length; cwk_path_terminate_output(buffer, buffer_size, new_path_size); return new_path_size; } bool cwk_path_is_absolute(const char *path) { size_t length; // We grab the root of the path. This root does not include the first // separator of a path. cwk_path_get_root(path, &length); // Now we can determine whether the root is absolute or not. return cwk_path_is_root_absolute(path, length); } bool cwk_path_is_relative(const char *path) { // The path is relative if it is not absolute. return !cwk_path_is_absolute(path); } void cwk_path_get_basename(const char *path, const char **basename, size_t *length) { struct cwk_segment segment; // We get the last segment of the path. The last segment will contain the // basename if there is any. If there are no segments we will set the basename // to NULL and the length to 0. if (!cwk_path_get_last_segment(path, &segment)) { *basename = NULL; *length = 0; return; } // Now we can just output the segment contents, since that's our basename. // There might be trailing separators after the basename, but the size does // not include those. *basename = segment.begin; *length = segment.size; } size_t cwk_path_change_basename(const char *path, const char *new_basename, char *buffer, size_t buffer_size) { struct cwk_segment segment; size_t pos, root_size, new_basename_size; // First we try to get the last segment. We may only have a root without any // segments, in which case we will create one. if (!cwk_path_get_last_segment(path, &segment)) { // So there is no segment in this path. First we grab the root and output // that. We are not going to modify the root in any way. cwk_path_get_root(path, &root_size); pos = cwk_path_output_sized(buffer, buffer_size, 0, path, root_size); // We have to trim the separators from the beginning of the new basename. // This is quite easy to do. while (cwk_path_is_separator(new_basename)) { ++new_basename; } // Now we measure the length of the new basename, this is a two step // process. First we find the '\0' character at the end of the string. new_basename_size = 0; while (new_basename[new_basename_size]) { ++new_basename_size; } // And then we trim the separators at the end of the basename until we reach // the first valid character. while (new_basename_size > 0 && cwk_path_is_separator(&new_basename[new_basename_size - 1])) { --new_basename_size; } // Now we will output the new basename after the root. pos += cwk_path_output_sized(buffer, buffer_size, pos, new_basename, new_basename_size); // And finally terminate the output and return the total size of the path. cwk_path_terminate_output(buffer, buffer_size, pos); return pos; } // If there is a last segment we can just forward this call, which is fairly // easy. return cwk_path_change_segment(&segment, new_basename, buffer, buffer_size); } void cwk_path_get_dirname(const char *path, size_t *length) { struct cwk_segment segment; // We get the last segment of the path. The last segment will contain the // basename if there is any. If there are no segments we will set the length // to 0. if (!cwk_path_get_last_segment(path, &segment)) { *length = 0; return; } // We can now return the length from the beginning of the string up to the // beginning of the last segment. *length = (size_t)(segment.begin - path); } bool cwk_path_get_extension(const char *path, const char **extension, size_t *length) { struct cwk_segment segment; const char *c; // We get the last segment of the path. The last segment will contain the // extension if there is any. if (!cwk_path_get_last_segment(path, &segment)) { return false; } // Now we search for a dot within the segment. If there is a dot, we consider // the rest of the segment the extension. We do this from the end towards the // beginning, since we want to find the last dot. for (c = segment.end; c >= segment.begin; --c) { if (*c == '.') { // Okay, we found an extension. We can stop looking now. *extension = c; *length = (size_t)(segment.end - c); return true; } } // We couldn't find any extension. return false; } bool cwk_path_has_extension(const char *path) { const char *extension; size_t length; // We just wrap the get_extension call which will then do the work for us. return cwk_path_get_extension(path, &extension, &length); } size_t cwk_path_change_extension(const char *path, const char *new_extension, char *buffer, size_t buffer_size) { struct cwk_segment segment; const char *c, *old_extension; size_t pos, root_size, trail_size, new_extension_size; // First we try to get the last segment. We may only have a root without any // segments, in which case we will create one. if (!cwk_path_get_last_segment(path, &segment)) { // So there is no segment in this path. First we grab the root and output // that. We are not going to modify the root in any way. If there is no // root, this will end up with a root size 0, and nothing will be written. cwk_path_get_root(path, &root_size); pos = cwk_path_output_sized(buffer, buffer_size, 0, path, root_size); // Add a dot if the submitted value doesn't have any. if (*new_extension != '.') { pos += cwk_path_output_dot(buffer, buffer_size, pos); } // And finally terminate the output and return the total size of the path. pos += cwk_path_output(buffer, buffer_size, pos, new_extension); cwk_path_terminate_output(buffer, buffer_size, pos); return pos; } // Now we seek the old extension in the last segment, which we will replace // with the new one. If there is no old extension, it will point to the end of // the segment. old_extension = segment.end; for (c = segment.begin; c < segment.end; ++c) { if (*c == '.') { old_extension = c; } } pos = cwk_path_output_sized(buffer, buffer_size, 0, segment.path, (size_t)(old_extension - segment.path)); // If the new extension starts with a dot, we will skip that dot. We always // output exactly one dot before the extension. If the extension contains // multiple dots, we will output those as part of the extension. if (*new_extension == '.') { ++new_extension; } // We calculate the size of the new extension, including the dot, in order to // output the trail - which is any part of the path coming after the // extension. We must output this first, since the buffer may overlap with the // submitted path - and it would be overridden by longer extensions. new_extension_size = strlen(new_extension) + 1; trail_size = cwk_path_output(buffer, buffer_size, pos + new_extension_size, segment.end); // Finally we output the dot and the new extension. The new extension itself // doesn't contain the dot anymore, so we must output that first. pos += cwk_path_output_dot(buffer, buffer_size, pos); pos += cwk_path_output(buffer, buffer_size, pos, new_extension); // Now we terminate the output with a null-terminating character, but before // we do that we must add the size of the trail to the position which we // output before. pos += trail_size; cwk_path_terminate_output(buffer, buffer_size, pos); // And the position is our output size now. return pos; } size_t cwk_path_normalize(const char *path, char *buffer, size_t buffer_size) { const char *paths[2]; // Now we initialize the paths which we will normalize. Since this function // only supports submitting a single path, we will only add that one. paths[0] = path; paths[1] = NULL; return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size); } size_t cwk_path_get_intersection(const char *path_base, const char *path_other) { bool absolute; size_t base_root_length, other_root_length; const char *end; const char *paths_base[2], *paths_other[2]; struct cwk_segment_joined base, other; // We first compare the two roots. We just return zero if they are not equal. // This will also happen to return zero if the paths are mixed relative and // absolute. cwk_path_get_root(path_base, &base_root_length); cwk_path_get_root(path_other, &other_root_length); if (!cwk_path_is_string_equal(path_base, path_other, base_root_length)) { return 0; } // Configure our paths. We just have a single path in here for now. paths_base[0] = path_base; paths_base[1] = NULL; paths_other[0] = path_other; paths_other[1] = NULL; // So we get the first segment of both paths. If one of those paths don't have // any segment, we will return 0. if (!cwk_path_get_first_segment_joined(paths_base, &base) || !cwk_path_get_first_segment_joined(paths_other, &other)) { return base_root_length; } // We now determine whether the path is absolute or not. This is required // because if will ignore removed segments, and this behaves differently if // the path is absolute. However, we only need to check the base path because // we are guaranteed that both paths are either relative or absolute. absolute = cwk_path_is_root_absolute(path_base, base_root_length); // We must keep track of the end of the previous segment. Initially, this is // set to the beginning of the path. This means that 0 is returned if the // first segment is not equal. end = path_base + base_root_length; // Now we loop over both segments until one of them reaches the end or their // contents are not equal. do { // We skip all segments which will be removed in each path, since we want to // know about the true path. if (!cwk_path_segment_joined_skip_invisible(&base, absolute) || !cwk_path_segment_joined_skip_invisible(&other, absolute)) { break; } if (!cwk_path_is_string_equal(base.segment.begin, other.segment.begin, base.segment.size)) { // So the content of those two segments are not equal. We will return the // size up to the beginning. return (size_t)(end - path_base); } // Remember the end of the previous segment before we go to the next one. end = base.segment.end; } while (cwk_path_get_next_segment_joined(&base) && cwk_path_get_next_segment_joined(&other)); // Now we calculate the length up to the last point where our paths pointed to // the same place. return (size_t)(end - path_base); } bool cwk_path_get_first_segment(const char *path, struct cwk_segment *segment) { size_t length; const char *segments; // We skip the root since that's not part of the first segment. The root is // treated as a separate entity. cwk_path_get_root(path, &length); segments = path + length; // Now, after we skipped the root we can continue and find the actual segment // content. return cwk_path_get_first_segment_without_root(path, segments, segment); } bool cwk_path_get_last_segment(const char *path, struct cwk_segment *segment) { // We first grab the first segment. This might be our last segment as well, // but we don't know yet. There is no last segment if there is no first // segment, so we return false in that case. if (!cwk_path_get_first_segment(path, segment)) { return false; } // Now we find our last segment. The segment struct of the caller // will contain the last segment, since the function we call here will not // change the segment struct when it reaches the end. while (cwk_path_get_next_segment(segment)) { // We just loop until there is no other segment left. } return true; } bool cwk_path_get_next_segment(struct cwk_segment *segment) { const char *c; // First we jump to the end of the previous segment. The first character must // be either a '\0' or a separator. c = segment->begin + segment->size; if (*c == '\0') { return false; } // Now we skip all separator until we reach something else. We are not yet // guaranteed to have a segment, since the string could just end afterwards. assert(cwk_path_is_separator(c)); do { ++c; } while (cwk_path_is_separator(c)); // If the string ends here, we can safely assume that there is no other // segment after this one. if (*c == '\0') { return false; } // Now we are safe to assume there is a segment. We store the beginning of // this segment in the segment struct of the caller. segment->begin = c; // And now determine the size of this segment, and store it in the struct of // the caller as well. c = cwk_path_find_next_stop(c); segment->end = c; segment->size = (size_t)(c - segment->begin); // Tell the caller that we found a segment. return true; } bool cwk_path_get_previous_segment(struct cwk_segment *segment) { const char *c; // The current position might point to the first character of the path, which // means there are no previous segments available. c = segment->begin; if (c <= segment->segments) { return false; } // We move towards the beginning of the path until we either reached the // beginning or the character is no separator anymore. do { --c; if (c < segment->segments) { // So we reached the beginning here and there is no segment. So we return // false and don't change the segment structure submitted by the caller. return false; } } while (cwk_path_is_separator(c)); // We are guaranteed now that there is another segment, since we moved before // the previous separator and did not reach the segment path beginning. segment->end = c + 1; segment->begin = cwk_path_find_previous_stop(segment->segments, c); segment->size = (size_t)(segment->end - segment->begin); return true; } enum cwk_segment_type cwk_path_get_segment_type( const struct cwk_segment *segment) { // We just make a string comparison with the segment contents and return the // appropriate type. if (strncmp(segment->begin, ".", segment->size) == 0) { return CWK_CURRENT; } else if (strncmp(segment->begin, "..", segment->size) == 0) { return CWK_BACK; } return CWK_NORMAL; } bool cwk_path_is_separator(const char *str) { const char *c; // We loop over all characters in the read symbols. c = separators[path_style]; while (*c) { if (*c == *str) { return true; } ++c; } return false; } size_t cwk_path_change_segment(struct cwk_segment *segment, const char *value, char *buffer, size_t buffer_size) { size_t pos, value_size, tail_size; // First we have to output the head, which is the whole string up to the // beginning of the segment. This part of the path will just stay the same. pos = cwk_path_output_sized(buffer, buffer_size, 0, segment->path, (size_t)(segment->begin - segment->path)); // In order to trip the submitted value, we will skip any separator at the // beginning of it and behave as if it was never there. while (cwk_path_is_separator(value)) { ++value; } // Now we determine the length of the value. In order to do that we first // locate the '\0'. value_size = 0; while (value[value_size]) { ++value_size; } // Since we trim separators at the beginning and in the end of the value we // have to subtract from the size until there are either no more characters // left or the last character is no separator. while (value_size > 0 && cwk_path_is_separator(&value[value_size - 1])) { --value_size; } // We also have to determine the tail size, which is the part of the string // following the current segment. This part will not change. tail_size = strlen(segment->end); // Now we output the tail. We have to do that, because if the buffer and the // source are overlapping we would override the tail if the value is // increasing in length. cwk_path_output_sized(buffer, buffer_size, pos + value_size, segment->end, tail_size); // Finally we can output the value in the middle of the head and the tail, // where we have enough space to fit the whole trimmed value. pos += cwk_path_output_sized(buffer, buffer_size, pos, value, value_size); // Now we add the tail size to the current position and terminate the output - // basically, ensure that there is a '\0' at the end of the buffer. pos += tail_size; cwk_path_terminate_output(buffer, buffer_size, pos); // And now tell the caller how long the whole path would be. return pos; } enum cwk_path_style cwk_path_guess_style(const char *path) { const char *c; size_t root_length; struct cwk_segment segment; // First we determine the root. Only windows roots can be longer than a single // slash, so if we can determine that it starts with something like "C:", we // know that this is a windows path. cwk_path_get_root_windows(path, &root_length); if (root_length > 1) { return CWK_STYLE_WINDOWS; } // Next we check for slashes. Windows uses backslashes, while unix uses // forward slashes. Windows actually supports both, but our best guess is to // assume windows with backslashes and unix with forward slashes. for (c = path; *c; ++c) { if (*c == *separators[CWK_STYLE_UNIX]) { return CWK_STYLE_UNIX; } else if (*c == *separators[CWK_STYLE_WINDOWS]) { return CWK_STYLE_WINDOWS; } } // This path does not have any slashes. We grab the last segment (which // actually must be the first one), and determine whether the segment starts // with a dot. A dot is a hidden folder or file in the UNIX world, in that // case we assume the path to have UNIX style. if (!cwk_path_get_last_segment(path, &segment)) { // We couldn't find any segments, so we default to a UNIX path style since // there is no way to make any assumptions. return CWK_STYLE_UNIX; } if (*segment.begin == '.') { return CWK_STYLE_UNIX; } // And finally we check whether the last segment contains a dot. If it // contains a dot, that might be an extension. Windows is more likely to have // file names with extensions, so our guess would be windows. for (c = segment.begin; *c; ++c) { if (*c == '.') { return CWK_STYLE_WINDOWS; } } // All our checks failed, so we will return a default value which is currently // UNIX. return CWK_STYLE_UNIX; } void cwk_path_set_style(enum cwk_path_style style) { // We can just set the global path style variable and then the behaviour for // all functions will change accordingly. assert(style == CWK_STYLE_UNIX || style == CWK_STYLE_WINDOWS); path_style = style; } enum cwk_path_style cwk_path_get_style(void) { // Simply return the path style which we store in a global variable. return path_style; }