vendredi 29 mars 2019

Why does the quick sort algorithm duration increase when the range of integers in an array is narrow?

I am trying to measure the duration for both Merge Sort and Quick Sort functions using std::chrono time calculations and using randomly generated arrays of integers within some range [A, B], the sizes of the arrays vary from 5000 to 100,000 integers.

The goal of my code is to prove that when the method of picking the (pivot) in quick sort is improved, the quick sort function ends up taking less time to process the array than merge sort, the way I pick the pivot is using the random index method to minimize the probability of having a complexity of (n^2), However in some cases which I will describe below, the quick sort ends up taking more time than merge sort and I would like to know why this occurs.

case 1: The range of the numbers in the array is small which increases the probability of having duplicate numbers in the array.

case 2: When I use a local IDE like clion, the quick sort function takes a lot more time than merge sort, however an online compiler like IDEONE.com gives similar results in both sorting algorithms (even when the range of the generated integers is small)

here are the results I got in the mentioned cases(the first row of numbers is merge sort results, the second row is quick sort results):

1-clion results narrow range of numbers (-100, 600) https://i.ibb.co/fNJ9yt1/CLION-GRAPH.png

2-clion results with a wide range of numbers (INT_MIN, INT_MAX) https://i.ibb.co/PwGDq8Y/CLION-RESULTS-WIDERANGE.png

3-IDEONE results with a narrow range of numbers (-100, 600) https://i.ibb.co/SRW20cM/IDEONE-GRAPH.png

4- IDEONE results with a wide range of numbers (INT_MIN, INT_MAX) https://i.ibb.co/rHyDf5f/IDEONE-RESULTS-WIDERANGE.jpg

#include <bits/stdc++.h>
#include <chrono>
#include <random>

using namespace std;

mt19937 gen(chrono::steady_clock::now().time_since_epoch().count());
int* generateArray(int size)
{
    int* arr = new int[size];
    uniform_int_distribution<> distribution(INT_MIN, INT_MAX);
    for (int i=0; i < size; ++i)
    {
        arr[i] = distribution(gen);
    }
    return arr;
}
void merge(int* leftArr, int nL, int* rightArr, int nR, int* mainArr)
{
    int i=0, j=0, k=0;
    while (i < nL && j < nR)
    {
        if (leftArr[i] < rightArr[j]) { mainArr[k++] = leftArr[i++]; }
        else { mainArr[k++] = rightArr[j++]; }
    }
    while (i < nL){ mainArr[k++] = leftArr[i++]; }
    while (j < nR){ mainArr[k++] = rightArr[j++]; }
}
void mergeSort (int* mainArray, int arrayLength)
{
    if (arrayLength < 2) { return; }
    int mid = arrayLength/2;
    int* leftArray = new int[mid];
    int* rightArray = new int[arrayLength - mid];
    for (int i=0; i<mid; ++i) {leftArray[i] = mainArray[i];}
    for (int i = mid; i<arrayLength; ++i) {rightArray[i - mid] = mainArray[i];}
    mergeSort(leftArray, mid);
    mergeSort(rightArray, arrayLength-mid);
    merge(leftArray, mid, rightArray, arrayLength-mid, mainArray);
    delete[] leftArray;
    delete[] rightArray;
}


int partition (int* arr, int left, int right)
{
    uniform_int_distribution<> distribution(left, right);
    int idx = distribution(gen);
    swap(arr[right], arr[idx]);
    int pivot = arr[right];
    int partitionIndex = left;
    for (int i = left; i < right; ++i)
    {
        if (arr[i] <= pivot)
        {
            swap(arr[i], arr[partitionIndex]);
            partitionIndex++;
        }
    }
    swap(arr[right], arr[partitionIndex]);
    return partitionIndex;
}
void quickSort (int* arr, int left, int right)
{
    if(left < right)
    {
        int partitionIndex = partition(arr, left, right);
        quickSort(arr, left, partitionIndex-1);
        quickSort(arr, partitionIndex+1, right);
    }
}

int main()
{
    vector <long long> mergeDuration;
    vector <long long> quickDuration;
    for (int i = 5000; i<= 100000; i += 5000)
    {
        int* arr = generateArray(i);
        auto startTime = chrono::high_resolution_clock::now();
        quickSort(arr, 0, i - 1);
        auto endTime = chrono::high_resolution_clock::now();
        long long duration = chrono::duration_cast<chrono::milliseconds>(endTime - startTime).count();
        quickDuration.push_back(duration);
        delete[] arr;
    }
    for (int i = 5000; i <= 100000; i += 5000 )
    {
        int* arr = generateArray(i);
        auto startTime = chrono::high_resolution_clock::now();
        mergeSort(arr, i);
        auto endTime = chrono::high_resolution_clock::now();
        long long duration = chrono::duration_cast<chrono::milliseconds>(endTime - startTime).count();
        mergeDuration.push_back(duration);
        delete[] arr;
    }
    for (int i = 0; i<mergeDuration.size(); ++i)
    {
        cout << mergeDuration[i] << " ";
    }
    cout  << endl;
    for (int i = 0; i<quickDuration.size(); ++i)
    {
        cout << quickDuration[i] << " ";
    }
}




Aucun commentaire:

Enregistrer un commentaire