MLIR  18.0.0git
File.h
Go to the documentation of this file.
1 //===- File.h - Reading sparse tensors from files ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements reading sparse tensor from files in one of the
10 // following external formats:
11 //
12 // (1) Matrix Market Exchange (MME): *.mtx
13 // https://math.nist.gov/MatrixMarket/formats.html
14 //
15 // (2) Formidable Repository of Open Sparse Tensors and Tools (FROSTT): *.tns
16 // http://frostt.io/tensors/file-formats.html
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H
21 #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H
22 
25 
26 #include <fstream>
27 
28 namespace mlir {
29 namespace sparse_tensor {
30 
31 namespace detail {
32 
33 template <typename T>
34 struct is_complex final : public std::false_type {};
35 
36 template <typename T>
37 struct is_complex<std::complex<T>> final : public std::true_type {};
38 
39 /// Returns an element-value of non-complex type. If `IsPattern` is true,
40 /// then returns an arbitrary value. If `IsPattern` is false, then
41 /// reads the value from the current line buffer beginning at `linePtr`.
42 template <typename V, bool IsPattern>
43 inline std::enable_if_t<!is_complex<V>::value, V> readValue(char **linePtr) {
44  // The external formats always store these numerical values with the type
45  // double, but we cast these values to the sparse tensor object type.
46  // For a pattern tensor, we arbitrarily pick the value 1 for all entries.
47  if constexpr (IsPattern)
48  return 1.0;
49  return strtod(*linePtr, linePtr);
50 }
51 
52 /// Returns an element-value of complex type. If `IsPattern` is true,
53 /// then returns an arbitrary value. If `IsPattern` is false, then reads
54 /// the value from the current line buffer beginning at `linePtr`.
55 template <typename V, bool IsPattern>
56 inline std::enable_if_t<is_complex<V>::value, V> readValue(char **linePtr) {
57  // Read two values to make a complex. The external formats always store
58  // numerical values with the type double, but we cast these values to the
59  // sparse tensor object type. For a pattern tensor, we arbitrarily pick the
60  // value 1 for all entries.
61  if constexpr (IsPattern)
62  return V(1.0, 1.0);
63  double re = strtod(*linePtr, linePtr);
64  double im = strtod(*linePtr, linePtr);
65  // Avoiding brace-notation since that forbids narrowing to `float`.
66  return V(re, im);
67 }
68 
69 /// Returns an element-value. If `isPattern` is true, then returns an
70 /// arbitrary value. If `isPattern` is false, then reads the value from
71 /// the current line buffer beginning at `linePtr`.
72 template <typename V>
73 inline V readValue(char **linePtr, bool isPattern) {
74  return isPattern ? readValue<V, true>(linePtr) : readValue<V, false>(linePtr);
75 }
76 
77 } // namespace detail
78 
79 //===----------------------------------------------------------------------===//
80 //
81 // Reader class.
82 //
83 //===----------------------------------------------------------------------===//
84 
85 /// This class abstracts over the information stored in file headers,
86 /// as well as providing the buffers and methods for parsing those headers.
87 class SparseTensorReader final {
88 public:
89  enum class ValueKind : uint8_t {
90  // The value before calling `readHeader`.
91  kInvalid = 0,
92  // Values that can be set by `readMMEHeader`.
93  kPattern = 1,
94  kReal = 2,
95  kInteger = 3,
96  kComplex = 4,
97  // The value set by `readExtFROSTTHeader`.
98  kUndefined = 5
99  };
100 
101  explicit SparseTensorReader(const char *filename) : filename(filename) {
102  assert(filename && "Received nullptr for filename");
103  }
104 
105  // Disallows copying, to avoid duplicating the `file` pointer.
108 
109  /// Factory method to allocate a new reader, open the file, read the
110  /// header, and validate that the actual contents of the file match
111  /// the expected `dimShape` and `valTp`.
112  static SparseTensorReader *create(const char *filename, uint64_t dimRank,
113  const uint64_t *dimShape,
114  PrimaryType valTp) {
115  SparseTensorReader *reader = new SparseTensorReader(filename);
116  reader->openFile();
117  reader->readHeader();
118  if (!reader->canReadAs(valTp))
120  "Tensor element type %d not compatible with values in file %s\n",
121  static_cast<int>(valTp), filename);
122  reader->assertMatchesShape(dimRank, dimShape);
123  return reader;
124  }
125 
126  // This dtor tries to avoid leaking the `file`. (Though it's better
127  // to call `closeFile` explicitly when possible, since there are
128  // circumstances where dtors are not called reliably.)
130 
131  /// Opens the file for reading.
132  void openFile();
133 
134  /// Closes the file.
135  void closeFile();
136 
137  /// Reads and parses the file's header.
138  void readHeader();
139 
140  /// Returns the stored value kind.
141  ValueKind getValueKind() const { return valueKind_; }
142 
143  /// Checks if a header has been successfully read.
144  bool isValid() const { return valueKind_ != ValueKind::kInvalid; }
145 
146  /// Checks if the file's ValueKind can be converted into the given
147  /// tensor PrimaryType. Is only valid after parsing the header.
148  bool canReadAs(PrimaryType valTy) const;
149 
150  /// Gets the MME "pattern" property setting. Is only valid after
151  /// parsing the header.
152  bool isPattern() const {
153  assert(isValid() && "Attempt to isPattern() before readHeader()");
154  return valueKind_ == ValueKind::kPattern;
155  }
156 
157  /// Gets the MME "symmetric" property setting. Is only valid after
158  /// parsing the header.
159  bool isSymmetric() const {
160  assert(isValid() && "Attempt to isSymmetric() before readHeader()");
161  return isSymmetric_;
162  }
163 
164  /// Gets the dimension-rank of the tensor. Is only valid after parsing
165  /// the header.
166  uint64_t getRank() const {
167  assert(isValid() && "Attempt to getRank() before readHeader()");
168  return idata[0];
169  }
170 
171  /// Gets the number of stored elements. Is only valid after parsing
172  /// the header.
173  uint64_t getNSE() const {
174  assert(isValid() && "Attempt to getNSE() before readHeader()");
175  return idata[1];
176  }
177 
178  /// Gets the dimension-sizes array. The pointer itself is always
179  /// valid; however, the values stored therein are only valid after
180  /// parsing the header.
181  const uint64_t *getDimSizes() const { return idata + 2; }
182 
183  /// Safely gets the size of the given dimension. Is only valid
184  /// after parsing the header.
185  uint64_t getDimSize(uint64_t d) const {
186  assert(d < getRank() && "Dimension out of bounds");
187  return idata[2 + d];
188  }
189 
190  /// Asserts the shape subsumes the actual dimension sizes. Is only
191  /// valid after parsing the header.
192  void assertMatchesShape(uint64_t rank, const uint64_t *shape) const;
193 
194  /// Allocates a new sparse-tensor storage object with the given encoding,
195  /// initializes it by reading all the elements from the file, and then
196  /// closes the file. Templated on P, I, and V.
197  template <typename P, typename I, typename V>
199  readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes,
200  const LevelType *lvlTypes, const uint64_t *dim2lvl,
201  const uint64_t *lvl2dim) {
202  const uint64_t dimRank = getRank();
203  MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim);
204  auto *lvlCOO = readCOO<V>(map, lvlSizes);
206  dimRank, getDimSizes(), lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim,
207  *lvlCOO);
208  delete lvlCOO;
209  return tensor;
210  }
211 
212  /// Reads the COO tensor from the file, stores the coordinates and values to
213  /// the given buffers, returns a boolean value to indicate whether the COO
214  /// elements are sorted.
215  template <typename C, typename V>
216  bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl,
217  const uint64_t *lvl2dim, C *lvlCoordinates, V *values);
218 
219 private:
220  /// Attempts to read a line from the file.
221  void readLine();
222 
223  /// Reads the next line of the input file and parses the coordinates
224  /// into the `dimCoords` argument. Returns the position in the `line`
225  /// buffer where the element's value should be parsed from.
226  template <typename C>
227  char *readCoords(C *dimCoords) {
228  readLine();
229  // Local variable for tracking the parser's position in the `line` buffer.
230  char *linePtr = line;
231  for (uint64_t dimRank = getRank(), d = 0; d < dimRank; ++d) {
232  // Parse the 1-based coordinate.
233  uint64_t c = strtoul(linePtr, &linePtr, 10);
234  // Store the 0-based coordinate.
235  dimCoords[d] = static_cast<C>(c - 1);
236  }
237  return linePtr;
238  }
239 
240  /// Reads all the elements from the file while applying the given map.
241  template <typename V>
242  SparseTensorCOO<V> *readCOO(const MapRef &map, const uint64_t *lvlSizes);
243 
244  /// The implementation of `readCOO` that is templated `IsPattern` in order
245  /// to perform LICM without needing to duplicate the source code.
246  template <typename V, bool IsPattern>
247  void readCOOLoop(const MapRef &map, SparseTensorCOO<V> *coo);
248 
249  /// The internal implementation of `readToBuffers`. We template over
250  /// `IsPattern` in order to perform LICM without needing to duplicate
251  /// the source code.
252  template <typename C, typename V, bool IsPattern>
253  bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values);
254 
255  /// Reads the MME header of a general sparse matrix of type real.
256  void readMMEHeader();
257 
258  /// Reads the "extended" FROSTT header. Although not part of the
259  /// documented format, we assume that the file starts with optional
260  /// comments followed by two lines that define the rank, the number of
261  /// nonzeros, and the dimensions sizes (one per rank) of the sparse tensor.
262  void readExtFROSTTHeader();
263 
264  static constexpr int kColWidth = 1025;
265  const char *const filename;
266  FILE *file = nullptr;
267  ValueKind valueKind_ = ValueKind::kInvalid;
268  bool isSymmetric_ = false;
269  uint64_t idata[512];
270  char line[kColWidth];
271 };
272 
273 //===----------------------------------------------------------------------===//
274 //
275 // Reader class methods.
276 //
277 //===----------------------------------------------------------------------===//
278 
279 template <typename V>
280 SparseTensorCOO<V> *SparseTensorReader::readCOO(const MapRef &map,
281  const uint64_t *lvlSizes) {
282  assert(isValid() && "Attempt to readCOO() before readHeader()");
283  // Prepare a COO object with the number of stored elems as initial capacity.
284  auto *coo = new SparseTensorCOO<V>(map.getLvlRank(), lvlSizes, getNSE());
285  // Enter the reading loop.
286  if (isPattern())
287  readCOOLoop<V, true>(map, coo);
288  else
289  readCOOLoop<V, false>(map, coo);
290  // Close the file and return the COO.
291  closeFile();
292  return coo;
293 }
294 
295 template <typename V, bool IsPattern>
296 void SparseTensorReader::readCOOLoop(const MapRef &map,
297  SparseTensorCOO<V> *coo) {
298  const uint64_t dimRank = map.getDimRank();
299  const uint64_t lvlRank = map.getLvlRank();
300  assert(dimRank == getRank());
301  std::vector<uint64_t> dimCoords(dimRank);
302  std::vector<uint64_t> lvlCoords(lvlRank);
303  for (uint64_t k = 0, nse = getNSE(); k < nse; k++) {
304  char *linePtr = readCoords(dimCoords.data());
305  const V value = detail::readValue<V, IsPattern>(&linePtr);
306  map.pushforward(dimCoords.data(), lvlCoords.data());
307  coo->add(lvlCoords, value);
308  }
309 }
310 
311 template <typename C, typename V>
312 bool SparseTensorReader::readToBuffers(uint64_t lvlRank,
313  const uint64_t *dim2lvl,
314  const uint64_t *lvl2dim,
315  C *lvlCoordinates, V *values) {
316  assert(isValid() && "Attempt to readCOO() before readHeader()");
317  MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim);
318  bool isSorted =
319  isPattern() ? readToBuffersLoop<C, V, true>(map, lvlCoordinates, values)
320  : readToBuffersLoop<C, V, false>(map, lvlCoordinates, values);
321  closeFile();
322  return isSorted;
323 }
324 
325 template <typename C, typename V, bool IsPattern>
326 bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates,
327  V *values) {
328  const uint64_t dimRank = map.getDimRank();
329  const uint64_t lvlRank = map.getLvlRank();
330  const uint64_t nse = getNSE();
331  assert(dimRank == getRank());
332  std::vector<C> dimCoords(dimRank);
333  bool isSorted = false;
334  char *linePtr;
335  const auto readNextElement = [&]() {
336  linePtr = readCoords<C>(dimCoords.data());
337  map.pushforward(dimCoords.data(), lvlCoordinates);
338  *values = detail::readValue<V, IsPattern>(&linePtr);
339  if (isSorted) {
340  // Note that isSorted is set to false when reading the first element,
341  // to guarantee the safeness of using prevLvlCoords.
342  C *prevLvlCoords = lvlCoordinates - lvlRank;
343  for (uint64_t l = 0; l < lvlRank; ++l) {
344  if (prevLvlCoords[l] != lvlCoordinates[l]) {
345  if (prevLvlCoords[l] > lvlCoordinates[l])
346  isSorted = false;
347  break;
348  }
349  }
350  }
351  lvlCoordinates += lvlRank;
352  ++values;
353  };
354  readNextElement();
355  isSorted = true;
356  for (uint64_t n = 1; n < nse; ++n)
357  readNextElement();
358  return isSorted;
359 }
360 
361 } // namespace sparse_tensor
362 } // namespace mlir
363 
364 #endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H
#define MLIR_SPARSETENSOR_FATAL(...)
This macro helps minimize repetition of the printf-and-exit idiom, as well as ensuring that we print ...
Definition: ErrorHandling.h:27
A class for capturing the sparse tensor type map with a compact encoding.
Definition: MapRef.h:33
void pushforward(const T *in, T *out) const
Definition: MapRef.h:43
uint64_t getLvlRank() const
Definition: MapRef.h:83
uint64_t getDimRank() const
Definition: MapRef.h:82
This class abstracts over the information stored in file headers, as well as providing the buffers an...
Definition: File.h:87
void assertMatchesShape(uint64_t rank, const uint64_t *shape) const
Asserts the shape subsumes the actual dimension sizes.
Definition: File.cpp:57
bool isPattern() const
Gets the MME "pattern" property setting.
Definition: File.h:152
void closeFile()
Closes the file.
Definition: File.cpp:30
SparseTensorReader & operator=(const SparseTensorReader &)=delete
uint64_t getDimSize(uint64_t d) const
Safely gets the size of the given dimension.
Definition: File.h:185
SparseTensorReader(const SparseTensorReader &)=delete
void readHeader()
Reads and parses the file's header.
Definition: File.cpp:44
const uint64_t * getDimSizes() const
Gets the dimension-sizes array.
Definition: File.h:181
bool canReadAs(PrimaryType valTy) const
Checks if the file's ValueKind can be converted into the given tensor PrimaryType.
Definition: File.cpp:65
uint64_t getNSE() const
Gets the number of stored elements.
Definition: File.h:173
bool isValid() const
Checks if a header has been successfully read.
Definition: File.h:144
ValueKind getValueKind() const
Returns the stored value kind.
Definition: File.h:141
SparseTensorStorage< P, I, V > * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, const LevelType *lvlTypes, const uint64_t *dim2lvl, const uint64_t *lvl2dim)
Allocates a new sparse-tensor storage object with the given encoding, initializes it by reading all t...
Definition: File.h:199
bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, const uint64_t *lvl2dim, C *lvlCoordinates, V *values)
Reads the COO tensor from the file, stores the coordinates and values to the given buffers,...
Definition: File.h:312
bool isSymmetric() const
Gets the MME "symmetric" property setting.
Definition: File.h:159
static SparseTensorReader * create(const char *filename, uint64_t dimRank, const uint64_t *dimShape, PrimaryType valTp)
Factory method to allocate a new reader, open the file, read the header, and validate that the actual...
Definition: File.h:112
uint64_t getRank() const
Gets the dimension-rank of the tensor.
Definition: File.h:166
SparseTensorReader(const char *filename)
Definition: File.h:101
void openFile()
Opens the file for reading.
Definition: File.cpp:21
A memory-resident sparse tensor using a storage scheme based on per-level sparse/dense annotations.
Definition: Storage.h:199
static SparseTensorStorage< P, C, V > * newFromCOO(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, const uint64_t *lvlSizes, const LevelType *lvlTypes, const uint64_t *dim2lvl, const uint64_t *lvl2dim, SparseTensorCOO< V > &lvlCOO)
Allocates a new sparse tensor and initializes it from the given COO.
Definition: Storage.h:668
std::enable_if_t<!is_complex< V >::value, V > readValue(char **linePtr)
Returns an element-value of non-complex type.
Definition: File.h:43
PrimaryType
Encoding of the elemental type, for "overloading" @newSparseTensor.
Definition: Enums.h:81
LevelType
This enum defines all the sparse representations supportable by the SparseTensor dialect.
Definition: Enums.h:168
Include the generated interface declarations.