返回顶部

收藏

实用的C语言字符串分割函数strsplit

更多

这是我写的一个字符串分割函数,可以根据提供的分隔符串列表将指定的字符串分割为若干个字符串,分隔符可以是单个字符也可以是字符串,可以设定是否压缩分隔符串(即当 两个或者两个以上分隔符串连续出现时不生成空串),也可以设定是否把查找到的分隔符串也插入到结果中。需要注意的是结果是动态分配的内存,使用完毕后需要自己释放。详 情请看函数头部注释,代码中有使用的示例。

在 gcc 和 vs2008 下均通过测试

接口:

int strsplit(char dest, int count, char *s_str, char separator, int number_separators, int compress_separator, int keep_separator);

参数说明:

@Param dest 输出参数,保存字符串分割的结果,是指向字符串数组的指针,采用了动态内存分配,使用完毕需自己释放。

@Param count 输出参数,保存成功被分割出的字符串个数。

@Param s_str 输入参数,需要被分割的源字符串。

@Param separator 输入参数,字符串数组,分割用的分隔符串列表

@Param number_separator 输入参数,分隔符的个数

@Param compress_separator 输入参数,是否压缩分隔符,即当源串中连续出现两个或者两个以上的分隔符时不生成中间的空串。

@Param keep_separator 输入参数,是否把源串中查找到的分隔符也作为一个字符串存入结果中。

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

/**
 * Split a string into some strings according to a list of separators.
 *
 * @Param dest                      out: storage the strings has be split.
 * @Param count                     out: the number of strings has be split successfully, 0 for failed to split.
 * @Param s_str                     in:  the strings for split.
 * @Param separator                 in:  the list of split separators.
 * @Param number_separator          in:  the numbers of separators.
 * @Param compress_separator        in:  will be create a empty string when two split adjacent
 *                                       if compress_separator > 0 and not for compress_separator == 0
 * @Param keep_separator            in:  the separators will be put into parameter 'dest' if keep_separator > 0
 */
int strsplit(char ***dest, int *count, char *s_str, char **separator, int number_separators, int compress_separator, int keep_separator)
{
    int i = 0;
    char **result = NULL;
    char **temp_result = NULL;
    unsigned int curt_size = 0;
    unsigned int new_size = 0;
    char *look_ahead = NULL;
    char *most_front_separator_start = NULL;
    char *most_front_separator_end = NULL;
    char *separator_start = NULL;
    int find_a_separator = 0;
    int find_a_string = 0;

    *count = 0;
    *dest = NULL;

    /* check parameters */
    if (
        dest == NULL 
        || s_str == NULL || *s_str == '\\0'
        || separator == NULL 
        || number_separators <= 0
        || compress_separator < 0
        || keep_separator < 0
        )
        return -1;

    for (i = 0; i < number_separators; i++)
        if (separator[i] == NULL || *separator[i] == '\\0')
            return -1;

    for (look_ahead = s_str; *look_ahead != '\\0'; look_ahead = most_front_separator_end)
    {
        most_front_separator_start = look_ahead + strlen(look_ahead);
        most_front_separator_end = look_ahead + strlen(look_ahead);
        find_a_separator = 0;

        /* find the next separator. */
        for (i = 0; i < number_separators; i++)
        {
            separator_start = strstr(look_ahead, separator[i]);
            if (separator_start == NULL)
                continue;

            find_a_separator = 1;
            /* update the most front separator. */
            if (separator_start < most_front_separator_start)
            {
                most_front_separator_start = separator_start;
                most_front_separator_end = most_front_separator_start + strlen(separator[i]);
            }
        }

        find_a_string = (look_ahead == most_front_separator_start) ? 0 : 1;

        /* allow put the new string into result if need. */
        new_size = (find_a_string > 0) ? (curt_size + 1) : ((compress_separator > 0) ? curt_size : (curt_size + 1));
        /* allow put the separator into result if need. */
        new_size = (keep_separator > 0) ? (new_size + 1) : new_size;
        if (new_size == curt_size)
            continue;

        temp_result = (char **)malloc((new_size) * sizeof(char *));
        if (temp_result == NULL)
        {
            if (result != NULL)
            {
                for (i = 0; i < curt_size; i++)
                    if (result[i] != NULL) 
                        free(result[i]);
                free(result);
                result = NULL;
            }

            return -2;
        }

        /* copy the pointers of string find early. */
        memset(temp_result, 0, new_size);
        for (i = 0; i < curt_size; i++)
            temp_result[i] = result[i];

        if (find_a_string == 0)
        {
            if (compress_separator == 0)
            {
                temp_result[curt_size] = (char *)malloc(sizeof(char));
                if (temp_result[curt_size] == NULL)
                {
                    if (temp_result != NULL)
                    {
                        for (i = 0; i < curt_size; i++)
                            if (temp_result[i] != NULL) 
                                free(temp_result[i]);
                        free(temp_result);
                        temp_result = NULL;
                    }

                    return -2;
                }
                memset(temp_result[curt_size], 0, 1);
            }
        } 
        else
        {
            /* put the new string into result. */
            temp_result[curt_size] = (char *)malloc((most_front_separator_start - look_ahead + 1) * sizeof(char));
            if (temp_result[curt_size] == NULL)
            {
                if (temp_result != NULL)
                {
                    for (i = 0; i < curt_size; i++)
                        if (temp_result[i] != NULL) 
                            free(temp_result[i]);
                    free(temp_result);
                    temp_result = NULL;
                }

                return -2;
            }
            memset(temp_result[curt_size], 0, most_front_separator_start - look_ahead + 1);
            strncpy(temp_result[curt_size], look_ahead, most_front_separator_start - look_ahead);
            temp_result[curt_size][most_front_separator_start - look_ahead] = '\\0';
        }

        if (keep_separator > 0)
        {   
            /* put the separator into result. */
            temp_result[new_size - 1] = (char *)malloc(most_front_separator_end - most_front_separator_start + 1);
            if (temp_result[new_size - 1] == NULL)
            {
                if (temp_result != NULL)
                {
                    for (i = 0; i < new_size - 1; i++)
                        if (temp_result[i] != NULL) 
                            free(temp_result[i]);
                    free(temp_result);
                    temp_result = NULL;
                }

                return -2;
            }
            memset(temp_result[new_size - 1], 0, most_front_separator_end - most_front_separator_start + 1);
            strncpy(temp_result[new_size - 1], most_front_separator_start, most_front_separator_end - most_front_separator_start);
            temp_result[new_size - 1][most_front_separator_end - most_front_separator_start] = '\\0';
        }

        /* update result. */
        free(result);
        result = temp_result;
        temp_result = NULL;
        curt_size = new_size;
    }

    *dest = result;
    *count = curt_size;

    return 0;
}

int main(int argc, char *argv[])
{
    char *separator[] = {"ab", "ba"};
    char *str = "abbabababbaababaaab";
    char **result = NULL;
    int n_str = 0;
    int i = strsplit(&result, &n_str, str, separator, 2, 1, 1);
    for (i = 0; i < n_str; i++)
        printf("%s\\n", result[i]);

    for (i = 0; i < n_str; i++)
        free(result[i]);
    free(result);
    return 0;
}
//该片段来自于http://outofmemory.cn

标签:c++,基础

收藏

0人收藏

支持

0

反对

0

»更多 您可能感兴趣的代码
  1. 2012-11-05 21:59:42java获得随机数代码 by 怪兽狂殴奥特曼
  2. 2014-11-04 13:01:18获取char*的长度 by 千万不要郁闷
  3. 2014-11-08 09:58:58打印日历 by aiheng1988
  4. 2014-12-07 12:15:24拉格朗日插值法求某一处的函数值 by qqmmcc
  5. 2012-12-18 14:03:31C++实现日期相加 by zoufei
  6. 2014-05-13 15:32:20生成不重复的随机数 by 灵剑子
  7. 2014-05-16 16:10:51通讯录管理系统 by aiheng1988
  8. 2014-05-17 16:21:16使用C++TR1实现物流配送的简单模拟 by 童学芬
  9. 2014-05-17 21:55:43计算器 by qqmmcc
  10. 2014-05-20 13:58:13基本的随机数的产生和排序 by qqmmcc
  11. 2014-05-21 14:30:44够2的程序 by 小项