WireCellToolkit
Wire Cell Simulation, Signal Process and Reconstruction Toolki for Liquid Argon Detectors
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
cnpy.h
Go to the documentation of this file.
1 //Copyright (C) 2011 Carl Rogers
2 //Released under MIT License
3 //license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
4 // "vendored" from https://github.com/rogersce/cnpy/commit/4e8810b1a8637695171ed346ce68f6984e585ef4
5 
6 #ifndef LIBCNPY_H_
7 #define LIBCNPY_H_
8 
9 #include<string>
10 #include<stdexcept>
11 #include<sstream>
12 #include<vector>
13 #include<cstdio>
14 #include<typeinfo>
15 #include<iostream>
16 #include<zlib.h>
17 #include<map>
18 #include<memory>
19 #include<stdint.h>
20 #include<numeric>
21 
22 namespace cnpy {
23 
24  struct NpyArray {
25  NpyArray(const std::vector<size_t>& _shape, size_t _word_size, bool _fortran_order) :
26  shape(_shape), word_size(_word_size), fortran_order(_fortran_order)
27  {
28  num_vals = 1;
29  for(size_t i = 0;i < shape.size();i++) num_vals *= shape[i];
30  data_holder = std::shared_ptr<std::vector<char>>(
31  new std::vector<char>(num_vals * word_size));
32  }
33 
35 
36  template<typename T>
37  T* data() {
38  return reinterpret_cast<T*>(&(*data_holder)[0]);
39  }
40 
41  template<typename T>
42  const T* data() const {
43  return reinterpret_cast<T*>(&(*data_holder)[0]);
44  }
45 
46  template<typename T>
47  std::vector<T> as_vec() const {
48  const T* p = data<T>();
49  return std::vector<T>(p, p+num_vals);
50  }
51 
52  size_t num_bytes() const {
53  return data_holder->size();
54  }
55 
56  std::shared_ptr<std::vector<char>> data_holder;
57  std::vector<size_t> shape;
58  size_t word_size;
60  size_t num_vals;
61  };
62 
63  using npz_t = std::map<std::string, NpyArray>;
64 
65  char BigEndianTest();
66  char map_type(const std::type_info& t);
67  template<typename T> std::vector<char> create_npy_header(const std::vector<size_t>& shape);
68  void parse_npy_header(FILE* fp,size_t& word_size, std::vector<size_t>& shape, bool& fortran_order);
69  void parse_npy_header(unsigned char* buffer,size_t& word_size, std::vector<size_t>& shape, bool& fortran_order);
70  void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset);
71  npz_t npz_load(std::string fname);
72  NpyArray npz_load(std::string fname, std::string varname);
73  NpyArray npy_load(std::string fname);
74 
75  template<typename T> std::vector<char>& operator+=(std::vector<char>& lhs, const T rhs) {
76  //write in little endian
77  for(size_t byte = 0; byte < sizeof(T); byte++) {
78  char val = *((char*)&rhs+byte);
79  lhs.push_back(val);
80  }
81  return lhs;
82  }
83 
84  template<> std::vector<char>& operator+=(std::vector<char>& lhs, const std::string rhs);
85  template<> std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs);
86 
87 
88  template<typename T> void npy_save(std::string fname, const T* data, const std::vector<size_t> shape, std::string mode = "w") {
89  FILE* fp = NULL;
90  std::vector<size_t> true_data_shape; //if appending, the shape of existing + new data
91 
92  if(mode == "a") fp = fopen(fname.c_str(),"r+b");
93 
94  if(fp) {
95  //file exists. we need to append to it. read the header, modify the array size
96  size_t word_size;
97  bool fortran_order;
98  parse_npy_header(fp,word_size,true_data_shape,fortran_order);
99  if (fortran_order) {
100  throw std::runtime_error("npy_save: unexpected fortran order");
101  }
102 
103  if(word_size != sizeof(T)) {
104  std::cerr<<"libnpy error: "<<fname<<" has word size "<<word_size<<" but npy_save appending data sized "<<sizeof(T)<<"\n";
105  if( word_size != sizeof(T) ) {
106  throw std::runtime_error("npy_save: illegal word size");
107  }
108  }
109  if(true_data_shape.size() != shape.size()) {
110  std::cerr<<"libnpy error: npy_save attempting to append misdimensioned data to "<<fname<<"\n";
111  throw std::runtime_error("npy_save: misdimensioned data");
112  // original assert() had a bug, I think.
113  }
114 
115  for(size_t i = 1; i < shape.size(); i++) {
116  if(shape[i] != true_data_shape[i]) {
117  std::cerr<<"libnpy error: npy_save attempting to append misshaped data to "<<fname<<"\n";
118  throw std::runtime_error("npy_save: misdimensioned data");
119  }
120  }
121  true_data_shape[0] += shape[0];
122  }
123  else {
124  fp = fopen(fname.c_str(),"wb");
125  true_data_shape = shape;
126  }
127 
128  std::vector<char> header = create_npy_header<T>(true_data_shape);
129  size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies<size_t>());
130 
131  fseek(fp,0,SEEK_SET);
132  fwrite(&header[0],sizeof(char),header.size(),fp);
133  fseek(fp,0,SEEK_END);
134  fwrite(data,sizeof(T),nels,fp);
135  fclose(fp);
136  }
137 
138  template<typename T> void npz_save(std::string zipname, std::string fname, const T* data, const std::vector<size_t>& shape, std::string mode = "w")
139  {
140  //first, append a .npy to the fname
141  fname += ".npy";
142 
143  //now, on with the show
144  FILE* fp = NULL;
145  uint16_t nrecs = 0;
146  size_t global_header_offset = 0;
147  std::vector<char> global_header;
148 
149  if(mode == "a") fp = fopen(zipname.c_str(),"r+b");
150 
151  if(fp) {
152  //zip file exists. we need to add a new npy file to it.
153  //first read the footer. this gives us the offset and size of the global header
154  //then read and store the global header.
155  //below, we will write the the new data at the start of the global header then append the global header and footer below it
156  size_t global_header_size;
157  parse_zip_footer(fp,nrecs,global_header_size,global_header_offset);
158  fseek(fp,global_header_offset,SEEK_SET);
159  global_header.resize(global_header_size);
160  size_t res = fread(&global_header[0],sizeof(char),global_header_size,fp);
161  if(res != global_header_size){
162  throw std::runtime_error("npz_save: header read error while adding to existing zip");
163  }
164  fseek(fp,global_header_offset,SEEK_SET);
165  }
166  else {
167  fp = fopen(zipname.c_str(),"wb");
168  }
169 
170  std::vector<char> npy_header = create_npy_header<T>(shape);
171 
172  size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies<size_t>());
173  size_t nbytes = nels*sizeof(T) + npy_header.size();
174 
175  //get the CRC of the data to be added
176  uint32_t crc = crc32(0L,(uint8_t*)&npy_header[0],npy_header.size());
177  crc = crc32(crc,(uint8_t*)data,nels*sizeof(T));
178 
179  //build the local header
180  std::vector<char> local_header;
181  local_header += "PK"; //first part of sig
182  local_header += (uint16_t) 0x0403; //second part of sig
183  local_header += (uint16_t) 20; //min version to extract
184  local_header += (uint16_t) 0; //general purpose bit flag
185  local_header += (uint16_t) 0; //compression method
186  local_header += (uint16_t) 0; //file last mod time
187  local_header += (uint16_t) 0; //file last mod date
188  local_header += (uint32_t) crc; //crc
189  local_header += (uint32_t) nbytes; //compressed size
190  local_header += (uint32_t) nbytes; //uncompressed size
191  local_header += (uint16_t) fname.size(); //fname length
192  local_header += (uint16_t) 0; //extra field length
193  local_header += fname;
194 
195  //build global header
196  global_header += "PK"; //first part of sig
197  global_header += (uint16_t) 0x0201; //second part of sig
198  global_header += (uint16_t) 20; //version made by
199  global_header.insert(global_header.end(),local_header.begin()+4,local_header.begin()+30);
200  global_header += (uint16_t) 0; //file comment length
201  global_header += (uint16_t) 0; //disk number where file starts
202  global_header += (uint16_t) 0; //internal file attributes
203  global_header += (uint32_t) 0; //external file attributes
204  global_header += (uint32_t) global_header_offset; //relative offset of local file header, since it begins where the global header used to begin
205  global_header += fname;
206 
207  //build footer
208  std::vector<char> footer;
209  footer += "PK"; //first part of sig
210  footer += (uint16_t) 0x0605; //second part of sig
211  footer += (uint16_t) 0; //number of this disk
212  footer += (uint16_t) 0; //disk where footer starts
213  footer += (uint16_t) (nrecs+1); //number of records on this disk
214  footer += (uint16_t) (nrecs+1); //total number of records
215  footer += (uint32_t) global_header.size(); //nbytes of global headers
216  footer += (uint32_t) (global_header_offset + nbytes + local_header.size()); //offset of start of global headers, since global header now starts after newly written array
217  footer += (uint16_t) 0; //zip file comment length
218 
219  //write everything
220  fwrite(&local_header[0],sizeof(char),local_header.size(),fp);
221  fwrite(&npy_header[0],sizeof(char),npy_header.size(),fp);
222  fwrite(data,sizeof(T),nels,fp);
223  fwrite(&global_header[0],sizeof(char),global_header.size(),fp);
224  fwrite(&footer[0],sizeof(char),footer.size(),fp);
225  fclose(fp);
226  }
227 
228  template<typename T> void npy_save(std::string fname, const std::vector<T> data, std::string mode = "w") {
229  std::vector<size_t> shape;
230  shape.push_back(data.size());
231  npy_save(fname, &data[0], shape, mode);
232  }
233 
234  template<typename T> void npz_save(std::string zipname, std::string fname, const std::vector<T> data, std::string mode = "w") {
235  std::vector<size_t> shape;
236  shape.push_back(data.size());
237  npz_save(zipname, fname, &data[0], shape, mode);
238  }
239 
240  template<typename T> std::vector<char> create_npy_header(const std::vector<size_t>& shape) {
241 
242  std::vector<char> dict;
243  dict += "{'descr': '";
244  dict += BigEndianTest();
245  dict += map_type(typeid(T));
246  dict += std::to_string(sizeof(T));
247  dict += "', 'fortran_order': False, 'shape': (";
248  dict += std::to_string(shape[0]);
249  for(size_t i = 1;i < shape.size();i++) {
250  dict += ", ";
251  dict += std::to_string(shape[i]);
252  }
253  if(shape.size() == 1) dict += ",";
254  dict += "), }";
255  //pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n
256  int remainder = 16 - (10 + dict.size()) % 16;
257  dict.insert(dict.end(),remainder,' ');
258  dict.back() = '\n';
259 
260  std::vector<char> header;
261  header += (char) 0x93;
262  header += "NUMPY";
263  header += (char) 0x01; //major version of numpy format
264  header += (char) 0x00; //minor version of numpy format
265  header += (uint16_t) dict.size();
266  header.insert(header.end(),dict.begin(),dict.end());
267 
268  return header;
269  }
270 
271 
272 }
273 
274 #endif
size_t word_size
Definition: cnpy.h:58
void npz_save(std::string zipname, std::string fname, const T *data, const std::vector< size_t > &shape, std::string mode="w")
Definition: cnpy.h:138
char map_type(const std::type_info &t)
Definition: cnpy.cxx:21
void parse_zip_footer(FILE *fp, uint16_t &nrecs, size_t &global_header_size, size_t &global_header_offset)
Definition: cnpy.cxx:162
npz_t npz_load(std::string fname)
Definition: cnpy.cxx:246
NpyArray npy_load(std::string fname)
Definition: cnpy.cxx:343
void npy_save(std::string fname, const T *data, const std::vector< size_t > shape, std::string mode="w")
Definition: cnpy.h:88
NpyArray()
Definition: cnpy.h:34
std::map< std::string, NpyArray > npz_t
Definition: cnpy.h:63
char BigEndianTest()
Definition: cnpy.cxx:16
std::vector< char > create_npy_header(const std::vector< size_t > &shape)
Definition: cnpy.h:240
void parse_npy_header(FILE *fp, size_t &word_size, std::vector< size_t > &shape, bool &fortran_order)
Definition: cnpy.cxx:107
std::shared_ptr< std::vector< char > > data_holder
Definition: cnpy.h:56
std::vector< size_t > shape
Definition: cnpy.h:57
bool fortran_order
Definition: cnpy.h:59
T * data()
Definition: cnpy.h:37
Definition: cnpy.h:22
size_t num_vals
Definition: cnpy.h:60
std::string to_string(const T &value)
Definition: format.h:3209
std::vector< T > as_vec() const
Definition: cnpy.h:47
std::vector< char > & operator+=(std::vector< char > &lhs, const T rhs)
Definition: cnpy.h:75
const T * data() const
Definition: cnpy.h:42
NpyArray(const std::vector< size_t > &_shape, size_t _word_size, bool _fortran_order)
Definition: cnpy.h:25
size_t num_bytes() const
Definition: cnpy.h:52