前言
项目需求中需要处理百万级别的数据,第一步便是读取指定路径下的所有.csv
文件,于是有了该博文。其中会用到C++11的新特性std::regex
(点击这里去了解)及其相关的正则表达式。
编译环境
系统环境:
patten@patten-hp:~$ sudo lsb_release -a
No LSB modules are available.
Distributor ID: Ubuntu
Description: Ubuntu 16.04.6 LTS
Release: 16.04
Codename: xenial
patten@patten-hp:~$
IDE环境:Visual Studio Code,Version: 1.38.1
目标路径下的文件列表:
patten@patten-hp:~/文档/collide/222222$ ls
drive-data_collision-gps_0.csv drive-data_collision-sensor-stay_0.csv
drive-data_collision-gps_1.csv drive-data_collision-sensor-stay_1.csv
drive-data_collision-gps_2.csv drive-data_collision-sensor-stay_2.csv
drive-data_collision-gps_3.csv drive-data_collision-sensor-stay_3.csv
drive-data_collision-gps_4.csv drive-data_collision-sensor-stay_4.csv
drive-data_collision-gps_5.csv drive-data_collision-sensor-stay_5.csv
drive-data_collision-gps_6.csv drive-data_collision-sensor-stay_6.csv
drive-data_collision-gps_7.csv drive-data_collision-sensor-stay_7.csv
drive-data_collision-sensor_7.csv result_check.csv
patten@patten-hp:~/文档/collide/222222$
代码示例
// searchTargetFile.cpp
#include <dirent.h>
#include <iostream>
#include <regex>
#include <string>
#include <vector>
using namespace std;
const std::string XJ_TESTDATASRC = "/home/patten/文档/collide/222222/"; //测试文件路径
const std::string prefix = "drive-data_collision-sensor-stay_"; //指定前缀
const std::string suffix = ".csv"; //指定后缀
//查找指定文件夹下指定前缀的文件
std::vector<std::string> Get_all_files_prefix(std::string path, std::string prefix) {
std::vector<std::string> files;
files.clear();
DIR *dp;
struct dirent *dirp;
if ((dp = opendir(path.c_str())) == NULL) {
cout << "Can not open " << path << endl;
return files;
}
cout << "open " << path << " successfully!" << endl;
std::regex reg_obj(prefix + ".*", regex::icase);
while ((dirp = readdir(dp)) != NULL) {
if (dirp->d_type == 8) // 4 means catalog; 8 means file; 0 means unknown
{
if (regex_match(dirp->d_name, reg_obj)) {
cout << dirp->d_name << endl;
string all_path = path + dirp->d_name;
files.push_back(all_path);
cout << dirp->d_name << " " << dirp->d_ino << " " << dirp->d_off << " " << dirp->d_reclen
<< " " << dirp->d_type << endl;
}
}
}
closedir(dp);
return files;
}
//查找指定文件夹下指定后缀的文件
std::vector<std::string> Get_all_files_suffix(std::string path, std::string suffix) {
std::vector<std::string> files;
files.clear();
DIR *dp;
struct dirent *dirp;
if ((dp = opendir(path.c_str())) == NULL) {
cout << "Can not open " << path << endl;
return files;
}
cout << "open " << path << " successfully!" << endl;
std::regex reg_obj(".*" + suffix, regex::icase);
while ((dirp = readdir(dp)) != NULL) {
if (dirp->d_type == 8) // 4 means catalog; 8 means file; 0 means unknown
{
if (regex_match(dirp->d_name, reg_obj)) {
cout << dirp->d_name << endl;
string all_path = path + dirp->d_name;
files.push_back(all_path);
cout << dirp->d_name << " " << dirp->d_ino << " " << dirp->d_off << " " << dirp->d_reclen
<< " " << dirp->d_type << endl;
}
}
}
closedir(dp);
return files;
}
int main() {
printf("\n/*************开始查找指定前缀文件*******************/\n");
Get_all_files_prefix(XJ_TESTDATASRC, prefix);
printf("\n/*************结束查找指定前缀文件*******************/\n");
printf("\n/*************开始查找指定后缀文件*******************/\n");
Get_all_files_suffix(XJ_TESTDATASRC, suffix);
printf("\n/*************结束查找指定后缀文件*******************/\n");
return 0;
}
编译运行结果
patten@patten-hp:~/workspace/xjCollide$ g++ searchTargetFile.cpp -std=c++11
patten@patten-hp:~/workspace/xjCollide$ ./a.out
/*************开始查找指定前缀文件*******************/
open /home/patten/文档/collide/222222/ successfully!
drive-data_collision-sensor-stay_3.csv
drive-data_collision-sensor-stay_3.csv 60868958 1496425211632737111 64
drive-data_collision-sensor-stay_0.csv
drive-data_collision-sensor-stay_0.csv 60868955 3389925680683477497 64
drive-data_collision-sensor-stay_4.csv
drive-data_collision-sensor-stay_4.csv 60868959 3914622289505186073 64
drive-data_collision-sensor-stay_5.csv
drive-data_collision-sensor-stay_5.csv 60868960 6337759545780258730 64
drive-data_collision-sensor-stay_2.csv
drive-data_collision-sensor-stay_2.csv 60868957 6847131988194754799 64
drive-data_collision-sensor-stay_6.csv
drive-data_collision-sensor-stay_6.csv 60868961 7096464187626317677 64
drive-data_collision-sensor-stay_1.csv
drive-data_collision-sensor-stay_1.csv 60868956 7711111459540839378 64
drive-data_collision-sensor-stay_7.csv
drive-data_collision-sensor-stay_7.csv 60868962 8148512087471513061 64
/*************结束查找指定前缀文件*******************/
/*************开始查找指定后缀文件*******************/
open /home/patten/文档/collide/222222/ successfully!
drive-data_collision-sensor-stay_3.csv
drive-data_collision-sensor-stay_3.csv 60868958 1496425211632737111 64
drive-data_collision-gps_3.csv
drive-data_collision-gps_3.csv 60868950 2617472239127085004 56
drive-data_collision-gps_2.csv
drive-data_collision-gps_2.csv 60868949 2631674452140846130 56
drive-data_collision-gps_7.csv
drive-data_collision-gps_7.csv 60868954 2725119114177981729 56
drive-data_collision-sensor_7.csv
drive-data_collision-sensor_7.csv 60868963 2961243752813142490 56
drive-data_collision-sensor-stay_0.csv
drive-data_collision-sensor-stay_0.csv 60868955 3389925680683477497 64
drive-data_collision-sensor-stay_4.csv
drive-data_collision-sensor-stay_4.csv 60868959 3914622289505186073 64
drive-data_collision-gps_0.csv
drive-data_collision-gps_0.csv 60868947 4273490190034659860 56
drive-data_collision-sensor-stay_5.csv
drive-data_collision-sensor-stay_5.csv 60868960 6337759545780258730 64
drive-data_collision-gps_6.csv
drive-data_collision-gps_6.csv 60868953 6382352328763770855 56
drive-data_collision-sensor-stay_2.csv
drive-data_collision-sensor-stay_2.csv 60868957 6847131988194754799 64
drive-data_collision-sensor-stay_6.csv
drive-data_collision-sensor-stay_6.csv 60868961 7096464187626317677 64
drive-data_collision-gps_1.csv
drive-data_collision-gps_1.csv 60868948 7163652831385258945 56
result_check.csv
result_check.csv 60868551 7655545783049624870 40
drive-data_collision-sensor-stay_1.csv
drive-data_collision-sensor-stay_1.csv 60868956 7711111459540839378 64
drive-data_collision-sensor-stay_7.csv
drive-data_collision-sensor-stay_7.csv 60868962 8148512087471513061 64
drive-data_collision-gps_5.csv
drive-data_collision-gps_5.csv 60868952 8963525734837085419 56
drive-data_collision-gps_4.csv
drive-data_collision-gps_4.csv 60868951 9223372036854775807 56
/*************结束查找指定后缀文件*******************/
patten@patten-hp:~/workspace/xjCollide$