MLIR  19.0.0git
File.h
Go to the documentation of this file.
1 //===- File.h - Reading sparse tensors from files ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements reading sparse tensor from files in one of the
10 // following external formats:
11 //
12 // (1) Matrix Market Exchange (MME): *.mtx
13 // https://math.nist.gov/MatrixMarket/formats.html
14 //
15 // (2) Formidable Repository of Open Sparse Tensors and Tools (FROSTT): *.tns
16 // http://frostt.io/tensors/file-formats.html
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H
21 #define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H
22 
25 
26 #include <fstream>
27 
28 namespace mlir {
29 namespace sparse_tensor {
30 
31 namespace detail {
32 
33 template <typename T>
34 struct is_complex final : public std::false_type {};
35 
36 template <typename T>
37 struct is_complex<std::complex<T>> final : public std::true_type {};
38 
39 /// Returns an element-value of non-complex type. If `IsPattern` is true,
40 /// then returns an arbitrary value. If `IsPattern` is false, then
41 /// reads the value from the current line buffer beginning at `linePtr`.
42 template <typename V, bool IsPattern>
43 inline std::enable_if_t<!is_complex<V>::value, V> readValue(char **linePtr) {
44  // The external formats always store these numerical values with the type
45  // double, but we cast these values to the sparse tensor object type.
46  // For a pattern tensor, we arbitrarily pick the value 1 for all entries.
47  if constexpr (IsPattern)
48  return 1.0;
49  return strtod(*linePtr, linePtr);
50 }
51 
52 /// Returns an element-value of complex type. If `IsPattern` is true,
53 /// then returns an arbitrary value. If `IsPattern` is false, then reads
54 /// the value from the current line buffer beginning at `linePtr`.
55 template <typename V, bool IsPattern>
56 inline std::enable_if_t<is_complex<V>::value, V> readValue(char **linePtr) {
57  // Read two values to make a complex. The external formats always store
58  // numerical values with the type double, but we cast these values to the
59  // sparse tensor object type. For a pattern tensor, we arbitrarily pick the
60  // value 1 for all entries.
61  if constexpr (IsPattern)
62  return V(1.0, 1.0);
63  double re = strtod(*linePtr, linePtr);
64  double im = strtod(*linePtr, linePtr);
65  // Avoiding brace-notation since that forbids narrowing to `float`.
66  return V(re, im);
67 }
68 
69 /// Returns an element-value. If `isPattern` is true, then returns an
70 /// arbitrary value. If `isPattern` is false, then reads the value from
71 /// the current line buffer beginning at `linePtr`.
72 template <typename V>
73 inline V readValue(char **linePtr, bool isPattern) {
74  return isPattern ? readValue<V, true>(linePtr) : readValue<V, false>(linePtr);
75 }
76 
77 } // namespace detail
78 
79 //===----------------------------------------------------------------------===//
80 //
81 // Reader class.
82 //
83 //===----------------------------------------------------------------------===//
84 
85 /// This class abstracts over the information stored in file headers,
86 /// as well as providing the buffers and methods for parsing those headers.
87 class SparseTensorReader final {
88 public:
89  enum class ValueKind : uint8_t {
90  // The value before calling `readHeader`.
91  kInvalid = 0,
92  // Values that can be set by `readMMEHeader`.
93  kPattern = 1,
94  kReal = 2,
95  kInteger = 3,
96  kComplex = 4,
97  // The value set by `readExtFROSTTHeader`.
98  kUndefined = 5
99  };
100 
101  explicit SparseTensorReader(const char *filename) : filename(filename) {
102  assert(filename && "Received nullptr for filename");
103  }
104 
105  // Disallows copying, to avoid duplicating the `file` pointer.
108 
109  /// Factory method to allocate a new reader, open the file, read the
110  /// header, and validate that the actual contents of the file match
111  /// the expected `dimShape` and `valTp`.
112  static SparseTensorReader *create(const char *filename, uint64_t dimRank,
113  const uint64_t *dimShape,
114  PrimaryType valTp) {
115  SparseTensorReader *reader = new SparseTensorReader(filename);
116  reader->openFile();
117  reader->readHeader();
118  if (!reader->canReadAs(valTp)) {
119  fprintf(stderr,
120  "Tensor element type %d not compatible with values in file %s\n",
121  static_cast<int>(valTp), filename);
122  exit(1);
123  }
124  reader->assertMatchesShape(dimRank, dimShape);
125  return reader;
126  }
127 
128  // This dtor tries to avoid leaking the `file`. (Though it's better
129  // to call `closeFile` explicitly when possible, since there are
130  // circumstances where dtors are not called reliably.)
132 
133  /// Opens the file for reading.
134  void openFile();
135 
136  /// Closes the file.
137  void closeFile();
138 
139  /// Reads and parses the file's header.
140  void readHeader();
141 
142  /// Returns the stored value kind.
143  ValueKind getValueKind() const { return valueKind_; }
144 
145  /// Checks if a header has been successfully read.
146  bool isValid() const { return valueKind_ != ValueKind::kInvalid; }
147 
148  /// Checks if the file's ValueKind can be converted into the given
149  /// tensor PrimaryType. Is only valid after parsing the header.
150  bool canReadAs(PrimaryType valTy) const;
151 
152  /// Gets the MME "pattern" property setting. Is only valid after
153  /// parsing the header.
154  bool isPattern() const {
155  assert(isValid() && "Attempt to isPattern() before readHeader()");
156  return valueKind_ == ValueKind::kPattern;
157  }
158 
159  /// Gets the MME "symmetric" property setting. Is only valid after
160  /// parsing the header.
161  bool isSymmetric() const {
162  assert(isValid() && "Attempt to isSymmetric() before readHeader()");
163  return isSymmetric_;
164  }
165 
166  /// Gets the dimension-rank of the tensor. Is only valid after parsing
167  /// the header.
168  uint64_t getRank() const {
169  assert(isValid() && "Attempt to getRank() before readHeader()");
170  return idata[0];
171  }
172 
173  /// Gets the number of stored elements. Is only valid after parsing
174  /// the header.
175  uint64_t getNSE() const {
176  assert(isValid() && "Attempt to getNSE() before readHeader()");
177  return idata[1];
178  }
179 
180  /// Gets the dimension-sizes array. The pointer itself is always
181  /// valid; however, the values stored therein are only valid after
182  /// parsing the header.
183  const uint64_t *getDimSizes() const { return idata + 2; }
184 
185  /// Safely gets the size of the given dimension. Is only valid
186  /// after parsing the header.
187  uint64_t getDimSize(uint64_t d) const {
188  assert(d < getRank() && "Dimension out of bounds");
189  return idata[2 + d];
190  }
191 
192  /// Asserts the shape subsumes the actual dimension sizes. Is only
193  /// valid after parsing the header.
194  void assertMatchesShape(uint64_t rank, const uint64_t *shape) const;
195 
196  /// Allocates a new sparse-tensor storage object with the given encoding,
197  /// initializes it by reading all the elements from the file, and then
198  /// closes the file. Templated on P, I, and V.
199  template <typename P, typename I, typename V>
201  readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes,
202  const LevelType *lvlTypes, const uint64_t *dim2lvl,
203  const uint64_t *lvl2dim) {
204  const uint64_t dimRank = getRank();
205  MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim);
206  auto *lvlCOO = readCOO<V>(map, lvlSizes);
208  dimRank, getDimSizes(), lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim,
209  lvlCOO);
210  delete lvlCOO;
211  return tensor;
212  }
213 
214  /// Reads the COO tensor from the file, stores the coordinates and values to
215  /// the given buffers, returns a boolean value to indicate whether the COO
216  /// elements are sorted.
217  template <typename C, typename V>
218  bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl,
219  const uint64_t *lvl2dim, C *lvlCoordinates, V *values);
220 
221 private:
222  /// Attempts to read a line from the file.
223  void readLine();
224 
225  /// Reads the next line of the input file and parses the coordinates
226  /// into the `dimCoords` argument. Returns the position in the `line`
227  /// buffer where the element's value should be parsed from.
228  template <typename C>
229  char *readCoords(C *dimCoords) {
230  readLine();
231  // Local variable for tracking the parser's position in the `line` buffer.
232  char *linePtr = line;
233  for (uint64_t dimRank = getRank(), d = 0; d < dimRank; ++d) {
234  // Parse the 1-based coordinate.
235  uint64_t c = strtoul(linePtr, &linePtr, 10);
236  // Store the 0-based coordinate.
237  dimCoords[d] = static_cast<C>(c - 1);
238  }
239  return linePtr;
240  }
241 
242  /// Reads all the elements from the file while applying the given map.
243  template <typename V>
244  SparseTensorCOO<V> *readCOO(const MapRef &map, const uint64_t *lvlSizes);
245 
246  /// The implementation of `readCOO` that is templated `IsPattern` in order
247  /// to perform LICM without needing to duplicate the source code.
248  template <typename V, bool IsPattern>
249  void readCOOLoop(const MapRef &map, SparseTensorCOO<V> *coo);
250 
251  /// The internal implementation of `readToBuffers`. We template over
252  /// `IsPattern` in order to perform LICM without needing to duplicate
253  /// the source code.
254  template <typename C, typename V, bool IsPattern>
255  bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values);
256 
257  /// Reads the MME header of a general sparse matrix of type real.
258  void readMMEHeader();
259 
260  /// Reads the "extended" FROSTT header. Although not part of the
261  /// documented format, we assume that the file starts with optional
262  /// comments followed by two lines that define the rank, the number of
263  /// nonzeros, and the dimensions sizes (one per rank) of the sparse tensor.
264  void readExtFROSTTHeader();
265 
266  static constexpr int kColWidth = 1025;
267  const char *const filename;
268  FILE *file = nullptr;
269  ValueKind valueKind_ = ValueKind::kInvalid;
270  bool isSymmetric_ = false;
271  uint64_t idata[512];
272  char line[kColWidth];
273 };
274 
275 //===----------------------------------------------------------------------===//
276 //
277 // Reader class methods.
278 //
279 //===----------------------------------------------------------------------===//
280 
281 template <typename V>
282 SparseTensorCOO<V> *SparseTensorReader::readCOO(const MapRef &map,
283  const uint64_t *lvlSizes) {
284  assert(isValid() && "Attempt to readCOO() before readHeader()");
285  // Prepare a COO object with the number of stored elems as initial capacity.
286  auto *coo = new SparseTensorCOO<V>(map.getLvlRank(), lvlSizes, getNSE());
287  // Enter the reading loop.
288  if (isPattern())
289  readCOOLoop<V, true>(map, coo);
290  else
291  readCOOLoop<V, false>(map, coo);
292  // Close the file and return the COO.
293  closeFile();
294  return coo;
295 }
296 
297 template <typename V, bool IsPattern>
298 void SparseTensorReader::readCOOLoop(const MapRef &map,
299  SparseTensorCOO<V> *coo) {
300  const uint64_t dimRank = map.getDimRank();
301  const uint64_t lvlRank = map.getLvlRank();
302  assert(dimRank == getRank());
303  std::vector<uint64_t> dimCoords(dimRank);
304  std::vector<uint64_t> lvlCoords(lvlRank);
305  for (uint64_t k = 0, nse = getNSE(); k < nse; k++) {
306  char *linePtr = readCoords(dimCoords.data());
307  const V value = detail::readValue<V, IsPattern>(&linePtr);
308  map.pushforward(dimCoords.data(), lvlCoords.data());
309  coo->add(lvlCoords, value);
310  }
311 }
312 
313 template <typename C, typename V>
314 bool SparseTensorReader::readToBuffers(uint64_t lvlRank,
315  const uint64_t *dim2lvl,
316  const uint64_t *lvl2dim,
317  C *lvlCoordinates, V *values) {
318  assert(isValid() && "Attempt to readCOO() before readHeader()");
319  MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim);
320  bool isSorted =
321  isPattern() ? readToBuffersLoop<C, V, true>(map, lvlCoordinates, values)
322  : readToBuffersLoop<C, V, false>(map, lvlCoordinates, values);
323  closeFile();
324  return isSorted;
325 }
326 
327 template <typename C, typename V, bool IsPattern>
328 bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates,
329  V *values) {
330  const uint64_t dimRank = map.getDimRank();
331  const uint64_t lvlRank = map.getLvlRank();
332  const uint64_t nse = getNSE();
333  assert(dimRank == getRank());
334  std::vector<C> dimCoords(dimRank);
335  bool isSorted = false;
336  char *linePtr;
337  const auto readNextElement = [&]() {
338  linePtr = readCoords<C>(dimCoords.data());
339  map.pushforward(dimCoords.data(), lvlCoordinates);
340  *values = detail::readValue<V, IsPattern>(&linePtr);
341  if (isSorted) {
342  // Note that isSorted is set to false when reading the first element,
343  // to guarantee the safeness of using prevLvlCoords.
344  C *prevLvlCoords = lvlCoordinates - lvlRank;
345  for (uint64_t l = 0; l < lvlRank; ++l) {
346  if (prevLvlCoords[l] != lvlCoordinates[l]) {
347  if (prevLvlCoords[l] > lvlCoordinates[l])
348  isSorted = false;
349  break;
350  }
351  }
352  }
353  lvlCoordinates += lvlRank;
354  ++values;
355  };
356  readNextElement();
357  isSorted = true;
358  for (uint64_t n = 1; n < nse; ++n)
359  readNextElement();
360  return isSorted;
361 }
362 
363 } // namespace sparse_tensor
364 } // namespace mlir
365 
366 #endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H
A class for capturing the sparse tensor type map with a compact encoding.
Definition: MapRef.h:33
void pushforward(const T *in, T *out) const
Definition: MapRef.h:43
uint64_t getLvlRank() const
Definition: MapRef.h:83
uint64_t getDimRank() const
Definition: MapRef.h:82
This class abstracts over the information stored in file headers, as well as providing the buffers an...
Definition: File.h:87
void assertMatchesShape(uint64_t rank, const uint64_t *shape) const
Asserts the shape subsumes the actual dimension sizes.
Definition: File.cpp:65
bool isPattern() const
Gets the MME "pattern" property setting.
Definition: File.h:154
void closeFile()
Closes the file.
Definition: File.cpp:34
SparseTensorReader & operator=(const SparseTensorReader &)=delete
uint64_t getDimSize(uint64_t d) const
Safely gets the size of the given dimension.
Definition: File.h:187
SparseTensorReader(const SparseTensorReader &)=delete
void readHeader()
Reads and parses the file's header.
Definition: File.cpp:50
const uint64_t * getDimSizes() const
Gets the dimension-sizes array.
Definition: File.h:183
bool canReadAs(PrimaryType valTy) const
Checks if the file's ValueKind can be converted into the given tensor PrimaryType.
Definition: File.cpp:73
uint64_t getNSE() const
Gets the number of stored elements.
Definition: File.h:175
bool isValid() const
Checks if a header has been successfully read.
Definition: File.h:146
ValueKind getValueKind() const
Returns the stored value kind.
Definition: File.h:143
SparseTensorStorage< P, I, V > * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, const LevelType *lvlTypes, const uint64_t *dim2lvl, const uint64_t *lvl2dim)
Allocates a new sparse-tensor storage object with the given encoding, initializes it by reading all t...
Definition: File.h:201
bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, const uint64_t *lvl2dim, C *lvlCoordinates, V *values)
Reads the COO tensor from the file, stores the coordinates and values to the given buffers,...
Definition: File.h:314
bool isSymmetric() const
Gets the MME "symmetric" property setting.
Definition: File.h:161
static SparseTensorReader * create(const char *filename, uint64_t dimRank, const uint64_t *dimShape, PrimaryType valTp)
Factory method to allocate a new reader, open the file, read the header, and validate that the actual...
Definition: File.h:112
uint64_t getRank() const
Gets the dimension-rank of the tensor.
Definition: File.h:168
SparseTensorReader(const char *filename)
Definition: File.h:101
void openFile()
Opens the file for reading.
Definition: File.cpp:21
A memory-resident sparse tensor using a storage scheme based on per-level sparse/dense annotations.
Definition: Storage.h:195
static SparseTensorStorage< P, C, V > * newFromCOO(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, const uint64_t *lvlSizes, const LevelType *lvlTypes, const uint64_t *dim2lvl, const uint64_t *lvl2dim, SparseTensorCOO< V > *lvlCOO)
Allocates a new sparse tensor and initializes it from the given COO.
Definition: Storage.h:590
std::enable_if_t<!is_complex< V >::value, V > readValue(char **linePtr)
Returns an element-value of non-complex type.
Definition: File.h:43
PrimaryType
Encoding of the elemental type, for "overloading" @newSparseTensor.
Definition: Enums.h:82
Include the generated interface declarations.
This enum defines all the sparse representations supportable by the SparseTensor dialect.
Definition: Enums.h:238