In the previous article, we have discussed about How to iterate a map in reverse order – C++. Let us learn std::set example and tutorial in C++ Program.
In this article we will see the process of using external sorting criteria.
Before going to this topic we must first know what external sorting is and what is a comparator.
External sorting:
It is a category of sorting algorithm that can sort huge amounts of data.
This type of sorting is applied to a data set that acquires large memory which cannot be held in main memory (RAM) and is stored in secondary memory(hard disk). we apply external sorting for a huge data set that cannot be processed in a single go. Data is divided into small chunks these chunks are sorted and then stored in data files.
#include <iostream>
#include <fstream>
#include<sstream>
#include <queue>
#include<algorithm>
using namespace std;
class Compare
{
public:
bool operator() (pair<int, int> pair1, pair<int, int> pair2)
{
return pair1.first > pair2.first;
}
};
string ToString(int val) {
stringstream stream;
stream << val;
return stream.str();
}
string mergeFiles(int counter) {
string fileA, fileB;
std::priority_queue<pair<int, int>, std::vector<pair<int, int> >, Compare> minHeap;
ifstream* handles = new ifstream[counter];
for (int i = 1; i <= counter; i++) {
string sortedInputFileName = "output" + ToString(i) + ".txt";
handles[i - 1].open(sortedInputFileName.c_str());
int firstValue;
handles[i - 1] >> firstValue;
minHeap.push(pair<int, int>(firstValue, i - 1));
}
string outputFileName = "output.txt";
ofstream outputFile(outputFileName.c_str());
while (minHeap.size() > 0) {
pair<int, int> minPair = minHeap.top();
minHeap.pop();
outputFile << minPair.first << '\n';
int nextValue;
flush(outputFile);
if (handles[minPair.second] >> nextValue) {
minHeap.push(pair <int, int>(nextValue, minPair.second));
}
}
for (int i = 1; i <= counter; i++) {
handles[i - 1].close();
}
outputFile.close();
delete[] handles;
return outputFileName;
}
void sortAndWrite(int *values, int size, int numberOfChunks) {
sort(values, values + size);
string outputFileName = "output" + ToString(numberOfChunks) + ".txt";
ofstream outputFile(outputFileName.c_str());
for (int i = 0; i < size; i++) {
outputFile << values[i] << '\n';
}
outputFile.close();
}
int main() {
int numberOfChunks = 1;
int maxSizeofMemory = 32;
int chunkSize = maxSizeofMemory / sizeof(int);
int* inputValues = new int[chunkSize];
int readValue = 0;
int currentCount = 0;
bool unprocessedData = true;
ifstream inputFile("input.txt");
while (inputFile >> readValue) {
unprocessedData = true;
inputValues[currentCount++] = readValue;
if (currentCount == chunkSize) {
sortAndWrite(inputValues, currentCount, numberOfChunks);
numberOfChunks++;
currentCount = 0;
unprocessedData = false;
}
}
if (unprocessedData) {
sortAndWrite(inputValues, currentCount, numberOfChunks);
}
else {
numberOfChunks--;
}
inputFile.close();
delete[] inputValues;
if (numberOfChunks != 0) {
std::priority_queue<pair<int, int>, std::vector<pair<int, int> >, Compare> minHeap;
cout << "output is in file : " << mergeFiles(numberOfChunks);
}
return 0;
}
Output : output is in file : output.txt
Comparator:
These classes are used to compare the objects of user-defined classes. To develop a generic function use template and to make the function more generic we use containers so that comparisons between data can be made.
Example: the most common example is Liner Search,
#include<iostream>
using namespace std;
int main()
{
int a[20],n,x,i,flag=0;
cout<<"How many elements?";
cin>>n;
cout<<"\nEnter elements of the array\n";
for(i=0;i<n;++i)
cin>>a[i];
cout<<"\nEnter element to search:";
cin>>x;
for(i=0;i<n;++i)
{
if(a[i]==x)
{
flag=1;
break;
}
}
if(flag)
cout<<"\nElement is found at position "<<i+1;
else
cout<<"\nElement not found";
return 0;
}
Output: How many elements?4 Enter elements of the array 5 11 12 4 Enter element to search:11 Element is found at position 2
Use of external sorting criteria i.e. comparator :
Below code is the implementation of this.
#include <iostream>
#include <set>
using namespace std;
int main()
{
set<int> set1;
set<int>::iterator iterator_1;
set<int>::iterator iterator_2;
set1.insert(2);
set1.insert(3);
set1.insert(4);
set1.insert(5);
set<int, greater<int>> set2(set1.begin(), set1.end());
cout << "Elements after insertion of set 1 = ";
for (iterator_1 = set1.begin(); iterator_1 != set1.end(); ++iterator_1)
cout << *iterator_1 << " ";
cout << "\nElements after insertion of set 2 = ";
for (iterator_2 = set2.begin(); iterator_2 != set2.end(); ++iterator_2)
cout << *iterator_2 << " ";
}
Output : Elements after insertion of set 1 = 2 3 4 5 Elements after insertion of set 2 = 5 4 3 2