排序算法的时间复杂度与数据量关系探究实验_C

本文为工11姚彦孜（学号：2021010607）数据结构课程的第一次作业，个人认为十分有价值，值得深入探讨，同时由于CSDN独特的代码段编辑功能，所以我把第一次作业发表在这里，希望在自己受益的同时能对大家有所帮助。

实验目的

我的mergesort排序在自己的VS上总是能够成功运行，但是在oj的提交平台上提交却经常出现问题，因此我希望通过自己进行不同大小数据集的模拟，来验证自己的代码是哪里出现了问题。

（一些心路历程：开始使用vector容器进行代码编写，出现了小数accepted，大数runtime error的情况，经助教提醒后修改为数组类型，但出现了大数accepted，小数wrong answer的情况，后改为用C语言编写，不知道为什么就成功了，在这里想探究一下原因）

C语言写出的mergesort运行时间

selectionsort运行时间

问题概述

1、测试数据生成：随机生成不同规模的测试数据，数据类型采用int类型，数据规模为10，10^2，10^3，10^4，10^5个数字.

2、实现SelectionSort和MergeSort，测试SelectionSort和MergeSort在不同规模数据上的运行时间。

3、用python绘制线图展示:不同排序算法在不同规模数据集上的运行时间

对比两种解决Selection问题的方法所需的时间。

测试数据生成

void Random(int* a, int n, int l, int r)//生成范围在l~r的随机数 
{
	srand(time(0));  //设置时间种子
	for (int i = 0; i < n; i++) {
		a[i] = rand() % (r - l + 1) + l;//生成区间r~l的随机数 
	}
}

实现SelectionSort、MergeSort并测试运行时间

由于我在oj上提交的是C版本，在这里附上C++版本，后续的检验也是使用我的C++版本检验的

#include 
using namespace std;
void mergesort(int *arr, int n, int a, int b)
{
	//传入数组，数组长度,起始序号和终止序号
	if (a >= b) return;
	int mid = a + (b - a) / 2;//防止溢出
	mergesort(arr, n, a, mid);
	mergesort(arr, n, mid + 1, b);//递归调用，持续二分
	int *tmp = new int[n];
	//对当前数组进行排序
	int i = a, j = mid + 1, size = a;
	while (i <= mid && j <= b)
	{
		if (arr[i] <= arr[j])
		{
			tmp[size] = arr[i];
			i++;
		}
		else
		{
			tmp[size] = arr[j];
			j++;
		}
		size++;
	}
	for (int k = i; k <= mid; ++k) {
		tmp[size++] = arr[k];
	}//如果左边没越界，那就把左边剩下的东西拷贝到tmp里去
	for (int k = j; k <= b; ++k) {
		tmp[size++] = arr[k];
	}//右边同理
	for (int k = a; k <= b; ++k)
	{
		arr[k] = tmp[k];
	}
}
int main()
{
	int n;
	cin >> n;
	int *nums = new int[n];
	for (int x = 0; x < n; x++)
	{
		cin >> nums[x];
	}
	mergesort(nums,n, 0, n - 1);
	for (int m = 0; m < n; m++)
	{
		cout << nums[m];
		cout << " ";
	}
	return 0;
}

1、用C++中的测试时间函数测试两个代码段

测试selectionsort

#include 
#include  
#include  // 时间相关的头文件
using namespace std;

void Random(int* a, int n, int l, int r)//生成范围在l~r的随机数 
{
	srand(time(0));  //设置时间种子
	for (int i = 0; i < n; i++) {
		a[i] = rand() % (r - l + 1) + l;//生成区间r~l的随机数 
	}
}

void selection(int* nums, int n)
{
	// selectSort 选择排序
	//遍历数组，找到最大的数，直到所有大数都挑选完
	int minIndex;//定义一个最小值所在的数组下标
	for (int i = 0; i < n - 1; ++i) {
		minIndex = i;
		for (int j = i + 1; j < n; ++j)
		{
			if (nums[j] < nums[minIndex])
			{
				minIndex = j;
			}//内层循环，如果数组中元素小于最小值，则把他命名为最小值
		}//内层循环结束后，我们已经找到了所有元素中的最小值
		swap(nums[i], nums[minIndex]);//在i=0时，将零号位置元素与最小值交换
	}//外层循环是一遍遍挑出最大的数，直到没有可挑的为止
}
int main()
{
	clock_t t; // 用来记录时间的变量
	int repeat, repeat_times = 100; // 用于重复测量的变量
	// 为了提高结果的精度，通常需要重复运行取均值
	int k;
	cin >> k;
	t = clock(); // 在你要测量的代码开始运行之前，记录下 clock() 的返回
	//值，作为代码运行的开始时间
	int* nums = new int[k];
	for (repeat = 0; repeat < repeat_times; ++repeat)
	{

		// 接下来是我们想要测量的代码，请注意排除无需测量的代码，比如文件读
		//写、数据生成等

		Random(nums, k, 1, 100);//生成随机数的通常范围为0~32767，这里通过取模控制取值为0~100
		for (int m = 0; m < k; m++)
		{
			cout << nums[m] << " ";
		}
		selection(nums, k);
		for (int m = 0; m < k; m++)
		{
			cout << nums[m] << " ";
		}
		// 我们实际想要测量的代码到此结束
	}
	t = clock() - t; // 在你要测量的代码运行结束后，用当前时间减去开始时
	//间，得到运行所需要的时间
	cout << "It took " << ((float)t) / CLOCKS_PER_SEC / repeat_times
		<< " seconds to run selectionsort" << endl; // 将 clock_t 转换成秒并
	//除以重复次数
	return 0;
}

测试mergesort

#include 
#include  
#include  // 时间相关的头文件
using namespace std;

void Random(int* a, int n, int l, int r)//生成范围在l~r的随机数 
{
	srand(time(0));  //设置时间种子
	for (int i = 0; i < n; i++) {
		a[i] = rand() % (r - l + 1) + l;//生成区间r~l的随机数 
	}
}

void selection(int* nums, int n)
{
	// selectSort 选择排序
	//遍历数组，找到最大的数，直到所有大数都挑选完
	int minIndex;//定义一个最小值所在的数组下标
	for (int i = 0; i < n - 1; ++i) {
		minIndex = i;
		for (int j = i + 1; j < n; ++j)
		{
			if (nums[j] < nums[minIndex])
			{
				minIndex = j;
			}//内层循环，如果数组中元素小于最小值，则把他命名为最小值
		}//内层循环结束后，我们已经找到了所有元素中的最小值
		swap(nums[i], nums[minIndex]);//在i=0时，将零号位置元素与最小值交换
	}//外层循环是一遍遍挑出最大的数，直到没有可挑的为止
}
int main()
{
	clock_t t; // 用来记录时间的变量
	int repeat, repeat_times = 100; // 用于重复测量的变量
	// 为了提高结果的精度，通常需要重复运行取均值
	int k;
	cin >> k;
	t = clock(); // 在你要测量的代码开始运行之前，记录下 clock() 的返回
	//值，作为代码运行的开始时间
	int* nums = new int[k];
	for (repeat = 0; repeat < repeat_times; ++repeat)
	{

		// 接下来是我们想要测量的代码，请注意排除无需测量的代码，比如文件读
		//写、数据生成等

		Random(nums, k, 1, 100);//生成随机数的通常范围为0~32767，这里通过取模控制取值为0~100
		selection(nums, k);
		// 我们实际想要测量的代码到此结束
	}
	t = clock() - t; // 在你要测量的代码运行结束后，用当前时间减去开始时
	//间，得到运行所需要的时间
	cout << "It took " << ((float)t) / CLOCKS_PER_SEC / repeat_times
		<< " seconds to run selectionsort" << endl; // 将 clock_t 转换成秒并
	//除以重复次数
	return 0;
}

可见，虽然此程序在oj中是partial accepted，但是我写的这个程序并没有错误，反倒是对大数和小数都很友好。

2、使用Python中的绘图函数绘制图像


import matplotlib.pyplot as plt
# 由于数据过少，我们直接采用折线图，并没有采用散点图进行拟合
x = [10, 100, 1000, 10000, 100000]
y1 = [0, 0.00002, 0.00186, 0.05741, 7.00938]
y2 = [0.00002, 0.00017, 0.00222, 0.01708, 1.87327]
# 绘制图形
plt.plot(x, y1, linewidth=1, color="orange", marker="o", label="select_time")
plt.plot(x, y2, linewidth=1, color="blue", marker="o", label="merge_time")
# 设置中文显示方式
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.legend()
plt.xlabel("数据集大小")  # 给x轴起名字
plt.ylabel("运行时间")  # 给y轴起名字
plt.savefig("data.png")
plt.show()

输出图像如下

可知mergesort的排序速度确实是比selection sort的排序速度快的。猜想得证。

欢迎分享，转载请注明来源：内存溢出

原文地址: https://outofmemory.cn/langs/2990914.html

排序算法的时间复杂度与数据量关系探究实验

发表评论

评论列表（0条）