前言

项目需求中需要处理百万级别的数据,第一步便是读取指定路径下的所有.csv文件,于是有了该博文。其中会用到C++11的新特性std::regex (点击这里去了解)及其相关的正则表达式。

编译环境

系统环境:

patten@patten-hp:~$ sudo lsb_release -a
No LSB modules are available.
Distributor ID:	Ubuntu
Description:	Ubuntu 16.04.6 LTS
Release:	16.04
Codename:	xenial
patten@patten-hp:~$

IDE环境:Visual Studio Code,Version: 1.38.1

目标路径下的文件列表:

patten@patten-hp:~/文档/collide/222222$ ls
drive-data_collision-gps_0.csv     drive-data_collision-sensor-stay_0.csv
drive-data_collision-gps_1.csv     drive-data_collision-sensor-stay_1.csv
drive-data_collision-gps_2.csv     drive-data_collision-sensor-stay_2.csv
drive-data_collision-gps_3.csv     drive-data_collision-sensor-stay_3.csv
drive-data_collision-gps_4.csv     drive-data_collision-sensor-stay_4.csv
drive-data_collision-gps_5.csv     drive-data_collision-sensor-stay_5.csv
drive-data_collision-gps_6.csv     drive-data_collision-sensor-stay_6.csv
drive-data_collision-gps_7.csv     drive-data_collision-sensor-stay_7.csv
drive-data_collision-sensor_7.csv  result_check.csv
patten@patten-hp:~/文档/collide/222222$

代码示例

// searchTargetFile.cpp

#include <dirent.h>
#include <iostream>
#include <regex>
#include <string>
#include <vector>

using namespace std;

const std::string XJ_TESTDATASRC = "/home/patten/文档/collide/222222/";  //测试文件路径
const std::string prefix = "drive-data_collision-sensor-stay_";          //指定前缀
const std::string suffix = ".csv";                                       //指定后缀

//查找指定文件夹下指定前缀的文件
std::vector<std::string> Get_all_files_prefix(std::string path, std::string prefix) {
  std::vector<std::string> files;
  files.clear();
  DIR *dp;
  struct dirent *dirp;
  if ((dp = opendir(path.c_str())) == NULL) {
    cout << "Can not open " << path << endl;
    return files;
  }
  cout << "open " << path << " successfully!" << endl;
  std::regex reg_obj(prefix + ".*", regex::icase);
  while ((dirp = readdir(dp)) != NULL) {
    if (dirp->d_type == 8)  // 4 means catalog; 8 means file; 0 means unknown
    {
      if (regex_match(dirp->d_name, reg_obj)) {
        cout << dirp->d_name << endl;
        string all_path = path + dirp->d_name;
        files.push_back(all_path);
        cout << dirp->d_name << " " << dirp->d_ino << " " << dirp->d_off << " " << dirp->d_reclen
             << " " << dirp->d_type << endl;
      }
    }
  }
  closedir(dp);
  return files;
}

//查找指定文件夹下指定后缀的文件
std::vector<std::string> Get_all_files_suffix(std::string path, std::string suffix) {
  std::vector<std::string> files;
  files.clear();
  DIR *dp;
  struct dirent *dirp;
  if ((dp = opendir(path.c_str())) == NULL) {
    cout << "Can not open " << path << endl;
    return files;
  }
  cout << "open " << path << " successfully!" << endl;
  std::regex reg_obj(".*" + suffix, regex::icase);
  while ((dirp = readdir(dp)) != NULL) {
    if (dirp->d_type == 8)  // 4 means catalog; 8 means file; 0 means unknown
    {
      if (regex_match(dirp->d_name, reg_obj)) {
        cout << dirp->d_name << endl;
        string all_path = path + dirp->d_name;
        files.push_back(all_path);
        cout << dirp->d_name << " " << dirp->d_ino << " " << dirp->d_off << " " << dirp->d_reclen
             << " " << dirp->d_type << endl;
      }
    }
  }
  closedir(dp);
  return files;
}

int main() {
  printf("\n/*************开始查找指定前缀文件*******************/\n");
  Get_all_files_prefix(XJ_TESTDATASRC, prefix);
  printf("\n/*************结束查找指定前缀文件*******************/\n");

  printf("\n/*************开始查找指定后缀文件*******************/\n");
  Get_all_files_suffix(XJ_TESTDATASRC, suffix);
  printf("\n/*************结束查找指定后缀文件*******************/\n");

  return 0;
}

编译运行结果

patten@patten-hp:~/workspace/xjCollide$ g++ searchTargetFile.cpp -std=c++11
patten@patten-hp:~/workspace/xjCollide$ ./a.out 

/*************开始查找指定前缀文件*******************/
open /home/patten/文档/collide/222222/ successfully!
drive-data_collision-sensor-stay_3.csv
drive-data_collision-sensor-stay_3.csv 60868958 1496425211632737111 64 
drive-data_collision-sensor-stay_0.csv
drive-data_collision-sensor-stay_0.csv 60868955 3389925680683477497 64 
drive-data_collision-sensor-stay_4.csv
drive-data_collision-sensor-stay_4.csv 60868959 3914622289505186073 64 
drive-data_collision-sensor-stay_5.csv
drive-data_collision-sensor-stay_5.csv 60868960 6337759545780258730 64 
drive-data_collision-sensor-stay_2.csv
drive-data_collision-sensor-stay_2.csv 60868957 6847131988194754799 64 
drive-data_collision-sensor-stay_6.csv
drive-data_collision-sensor-stay_6.csv 60868961 7096464187626317677 64 
drive-data_collision-sensor-stay_1.csv
drive-data_collision-sensor-stay_1.csv 60868956 7711111459540839378 64 
drive-data_collision-sensor-stay_7.csv
drive-data_collision-sensor-stay_7.csv 60868962 8148512087471513061 64 

/*************结束查找指定前缀文件*******************/

/*************开始查找指定后缀文件*******************/
open /home/patten/文档/collide/222222/ successfully!
drive-data_collision-sensor-stay_3.csv
drive-data_collision-sensor-stay_3.csv 60868958 1496425211632737111 64 
drive-data_collision-gps_3.csv
drive-data_collision-gps_3.csv 60868950 2617472239127085004 56 
drive-data_collision-gps_2.csv
drive-data_collision-gps_2.csv 60868949 2631674452140846130 56 
drive-data_collision-gps_7.csv
drive-data_collision-gps_7.csv 60868954 2725119114177981729 56 
drive-data_collision-sensor_7.csv
drive-data_collision-sensor_7.csv 60868963 2961243752813142490 56 
drive-data_collision-sensor-stay_0.csv
drive-data_collision-sensor-stay_0.csv 60868955 3389925680683477497 64 
drive-data_collision-sensor-stay_4.csv
drive-data_collision-sensor-stay_4.csv 60868959 3914622289505186073 64 
drive-data_collision-gps_0.csv
drive-data_collision-gps_0.csv 60868947 4273490190034659860 56 
drive-data_collision-sensor-stay_5.csv
drive-data_collision-sensor-stay_5.csv 60868960 6337759545780258730 64 
drive-data_collision-gps_6.csv
drive-data_collision-gps_6.csv 60868953 6382352328763770855 56 
drive-data_collision-sensor-stay_2.csv
drive-data_collision-sensor-stay_2.csv 60868957 6847131988194754799 64 
drive-data_collision-sensor-stay_6.csv
drive-data_collision-sensor-stay_6.csv 60868961 7096464187626317677 64 
drive-data_collision-gps_1.csv
drive-data_collision-gps_1.csv 60868948 7163652831385258945 56 
result_check.csv
result_check.csv 60868551 7655545783049624870 40 
drive-data_collision-sensor-stay_1.csv
drive-data_collision-sensor-stay_1.csv 60868956 7711111459540839378 64 
drive-data_collision-sensor-stay_7.csv
drive-data_collision-sensor-stay_7.csv 60868962 8148512087471513061 64 
drive-data_collision-gps_5.csv
drive-data_collision-gps_5.csv 60868952 8963525734837085419 56 
drive-data_collision-gps_4.csv
drive-data_collision-gps_4.csv 60868951 9223372036854775807 56 

/*************结束查找指定后缀文件*******************/
patten@patten-hp:~/workspace/xjCollide$