MLIR 23.0.0git
File.h
Go to the documentation of this file.
1//===- File.h - Reading sparse tensors from files ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements reading sparse tensor from files in one of the
10// following external formats:
11//
12// (1) Matrix Market Exchange (MME): *.mtx
13// https://math.nist.gov/MatrixMarket/formats.html
14//
15// (2) Formidable Repository of Open Sparse Tensors and Tools (FROSTT): *.tns
16// http://frostt.io/tensors/file-formats.html
17//
18//===----------------------------------------------------------------------===//
19
20#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H
21#define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H
22
26
27#include <fstream>
28
29namespace mlir {
30namespace sparse_tensor {
31
32namespace detail {
33
34template <typename T>
35struct is_complex final : public std::false_type {};
36
37template <typename T>
38struct is_complex<std::complex<T>> final : public std::true_type {};
39
40template <typename T>
41struct is_complex<mlir::NonFloatComplex<T>> final : public std::true_type {};
42
43/// Returns an element-value of non-complex type. If `IsPattern` is true,
44/// then returns an arbitrary value. If `IsPattern` is false, then
45/// reads the value from the current line buffer beginning at `linePtr`.
46template <typename V, bool IsPattern>
47inline std::enable_if_t<!is_complex<V>::value, V> readValue(char **linePtr) {
48 // The external formats always store these numerical values with the type
49 // double, but we cast these values to the sparse tensor object type.
50 // For a pattern tensor, we arbitrarily pick the value 1 for all entries.
51 if constexpr (IsPattern)
52 return 1.0;
53 return strtod(*linePtr, linePtr);
54}
55
56/// Returns an element-value of complex type. If `IsPattern` is true,
57/// then returns an arbitrary value. If `IsPattern` is false, then reads
58/// the value from the current line buffer beginning at `linePtr`.
59template <typename V, bool IsPattern>
60inline std::enable_if_t<is_complex<V>::value, V> readValue(char **linePtr) {
61 // Read two values to make a complex. The external formats always store
62 // numerical values with the type double, but we cast these values to the
63 // sparse tensor object type. For a pattern tensor, we arbitrarily pick the
64 // value 1 for all entries.
65 if constexpr (IsPattern)
66 return V(1.0, 1.0);
67 double re = strtod(*linePtr, linePtr);
68 double im = strtod(*linePtr, linePtr);
69 // Avoiding brace-notation since that forbids narrowing to `float`.
70 return V(re, im);
71}
72
73/// Returns an element-value. If `isPattern` is true, then returns an
74/// arbitrary value. If `isPattern` is false, then reads the value from
75/// the current line buffer beginning at `linePtr`.
76template <typename V>
77inline V readValue(char **linePtr, bool isPattern) {
78 return isPattern ? readValue<V, true>(linePtr) : readValue<V, false>(linePtr);
79}
80
81} // namespace detail
82
83//===----------------------------------------------------------------------===//
84//
85// Reader class.
86//
87//===----------------------------------------------------------------------===//
88
89/// This class abstracts over the information stored in file headers,
90/// as well as providing the buffers and methods for parsing those headers.
91class SparseTensorReader final {
92public:
93 enum class ValueKind : uint8_t {
94 // The value before calling `readHeader`.
96 // Values that can be set by `readMMEHeader`.
98 kReal = 2,
101 // The value set by `readExtFROSTTHeader`.
103 };
104
105 explicit SparseTensorReader(const char *filename) : filename(filename) {
106 assert(filename && "Received nullptr for filename");
107 }
108
109 // Disallows copying, to avoid duplicating the `file` pointer.
112
113 /// Factory method to allocate a new reader, open the file, read the
114 /// header, and validate that the actual contents of the file match
115 /// the expected `dimShape` and `valTp`.
116 static SparseTensorReader *create(const char *filename, uint64_t dimRank,
117 const uint64_t *dimShape,
118 PrimaryType valTp) {
119 SparseTensorReader *reader = new SparseTensorReader(filename);
120 reader->openFile();
121 reader->readHeader();
122 if (!reader->canReadAs(valTp)) {
123 fprintf(stderr,
124 "Tensor element type %d not compatible with values in file %s\n",
125 static_cast<int>(valTp), filename);
126 exit(1);
127 }
128 reader->assertMatchesShape(dimRank, dimShape);
129 return reader;
130 }
131
132 // This dtor tries to avoid leaking the `file`. (Though it's better
133 // to call `closeFile` explicitly when possible, since there are
134 // circumstances where dtors are not called reliably.)
136
137 /// Opens the file for reading.
138 void openFile();
139
140 /// Closes the file.
141 void closeFile();
142
143 /// Reads and parses the file's header.
144 void readHeader();
145
146 /// Returns the stored value kind.
147 ValueKind getValueKind() const { return valueKind_; }
148
149 /// Checks if a header has been successfully read.
150 bool isValid() const { return valueKind_ != ValueKind::kInvalid; }
151
152 /// Checks if the file's ValueKind can be converted into the given
153 /// tensor PrimaryType. Is only valid after parsing the header.
154 bool canReadAs(PrimaryType valTy) const;
155
156 /// Gets the MME "pattern" property setting. Is only valid after
157 /// parsing the header.
158 bool isPattern() const {
159 assert(isValid() && "Attempt to isPattern() before readHeader()");
160 return valueKind_ == ValueKind::kPattern;
161 }
162
163 /// Gets the MME "symmetric" property setting. Is only valid after
164 /// parsing the header.
165 bool isSymmetric() const {
166 assert(isValid() && "Attempt to isSymmetric() before readHeader()");
167 return isSymmetric_;
168 }
169
170 /// Gets the dimension-rank of the tensor. Is only valid after parsing
171 /// the header.
172 uint64_t getRank() const {
173 assert(isValid() && "Attempt to getRank() before readHeader()");
174 return idata[0];
175 }
176
177 /// Gets the number of stored elements. Is only valid after parsing
178 /// the header.
179 uint64_t getNSE() const {
180 assert(isValid() && "Attempt to getNSE() before readHeader()");
181 return idata[1];
182 }
183
184 /// Gets the dimension-sizes array. The pointer itself is always
185 /// valid; however, the values stored therein are only valid after
186 /// parsing the header.
187 const uint64_t *getDimSizes() const { return idata + 2; }
188
189 /// Safely gets the size of the given dimension. Is only valid
190 /// after parsing the header.
191 uint64_t getDimSize(uint64_t d) const {
192 assert(d < getRank() && "Dimension out of bounds");
193 return idata[2 + d];
194 }
195
196 /// Asserts the shape subsumes the actual dimension sizes. Is only
197 /// valid after parsing the header.
198 void assertMatchesShape(uint64_t rank, const uint64_t *shape) const;
199
200 /// Allocates a new sparse-tensor storage object with the given encoding,
201 /// initializes it by reading all the elements from the file, and then
202 /// closes the file. Templated on P, C, and V.
203 template <typename P, typename C, typename V>
205 readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes,
206 const LevelType *lvlTypes, const uint64_t *dim2lvl,
207 const uint64_t *lvl2dim) {
208 const uint64_t dimRank = getRank();
209 MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim);
210 auto *lvlCOO = readCOO<V>(map, lvlSizes);
212 dimRank, getDimSizes(), lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim,
213 lvlCOO);
214 delete lvlCOO;
215 return tensor;
216 }
217
218 /// Reads the COO tensor from the file, stores the coordinates and values to
219 /// the given buffers, returns a boolean value to indicate whether the COO
220 /// elements are sorted.
221 template <typename C, typename V>
222 bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl,
223 const uint64_t *lvl2dim, C *lvlCoordinates, V *values);
224
225private:
226 /// Attempts to read a line from the file.
227 void readLine();
228
229 /// Reads the next line of the input file and parses the coordinates
230 /// into the `dimCoords` argument. Returns the position in the `line`
231 /// buffer where the element's value should be parsed from.
232 template <typename C>
233 char *readCoords(C *dimCoords) {
234 readLine();
235 // Local variable for tracking the parser's position in the `line` buffer.
236 char *linePtr = line;
237 for (uint64_t dimRank = getRank(), d = 0; d < dimRank; ++d) {
238 // Parse the 1-based coordinate.
239 uint64_t c = strtoul(linePtr, &linePtr, 10);
240 // Store the 0-based coordinate.
241 dimCoords[d] = static_cast<C>(c - 1);
242 }
243 return linePtr;
244 }
245
246 /// Reads all the elements from the file while applying the given map.
247 template <typename V>
248 SparseTensorCOO<V> *readCOO(const MapRef &map, const uint64_t *lvlSizes);
249
250 /// The implementation of `readCOO` that is templated `IsPattern` in order
251 /// to perform LICM without needing to duplicate the source code.
252 template <typename V, bool IsPattern>
253 void readCOOLoop(const MapRef &map, SparseTensorCOO<V> *coo);
254
255 /// The internal implementation of `readToBuffers`. We template over
256 /// `IsPattern` in order to perform LICM without needing to duplicate
257 /// the source code.
258 template <typename C, typename V, bool IsPattern>
259 bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values);
260
261 /// Reads the MME header of a general sparse matrix of type real.
262 void readMMEHeader();
263
264 /// Reads the "extended" FROSTT header. Although not part of the
265 /// documented format, we assume that the file starts with optional
266 /// comments followed by two lines that define the rank, the number of
267 /// nonzeros, and the dimensions sizes (one per rank) of the sparse tensor.
268 void readExtFROSTTHeader();
269
270 static constexpr int kColWidth = 1025;
271 const char *const filename;
272 FILE *file = nullptr;
273 ValueKind valueKind_ = ValueKind::kInvalid;
274 bool isSymmetric_ = false;
275 uint64_t idata[512];
276 char line[kColWidth];
277};
278
279//===----------------------------------------------------------------------===//
280//
281// Reader class methods.
282//
283//===----------------------------------------------------------------------===//
284
285template <typename V>
286SparseTensorCOO<V> *SparseTensorReader::readCOO(const MapRef &map,
287 const uint64_t *lvlSizes) {
288 assert(isValid() && "Attempt to readCOO() before readHeader()");
289 // Prepare a COO object with the number of stored elems as initial capacity.
290 auto *coo = new SparseTensorCOO<V>(map.getLvlRank(), lvlSizes, getNSE());
291 // Enter the reading loop.
292 if (isPattern())
293 readCOOLoop<V, true>(map, coo);
294 else
295 readCOOLoop<V, false>(map, coo);
296 // Close the file and return the COO.
297 closeFile();
298 return coo;
299}
300
301template <typename V, bool IsPattern>
302void SparseTensorReader::readCOOLoop(const MapRef &map,
303 SparseTensorCOO<V> *coo) {
304 const uint64_t dimRank = map.getDimRank();
305 const uint64_t lvlRank = map.getLvlRank();
306 assert(dimRank == getRank());
307 std::vector<uint64_t> dimCoords(dimRank);
308 std::vector<uint64_t> lvlCoords(lvlRank);
309 for (uint64_t k = 0, nse = getNSE(); k < nse; k++) {
310 char *linePtr = readCoords(dimCoords.data());
311 const V value = detail::readValue<V, IsPattern>(&linePtr);
312 map.pushforward(dimCoords.data(), lvlCoords.data());
313 coo->add(lvlCoords, value);
314 }
315}
316
317template <typename C, typename V>
319 const uint64_t *dim2lvl,
320 const uint64_t *lvl2dim,
321 C *lvlCoordinates, V *values) {
322 assert(isValid() && "Attempt to readCOO() before readHeader()");
323 MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim);
324 bool isSorted =
325 isPattern() ? readToBuffersLoop<C, V, true>(map, lvlCoordinates, values)
326 : readToBuffersLoop<C, V, false>(map, lvlCoordinates, values);
327 closeFile();
328 return isSorted;
329}
330
331template <typename C, typename V, bool IsPattern>
332bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates,
333 V *values) {
334 const uint64_t dimRank = map.getDimRank();
335 const uint64_t lvlRank = map.getLvlRank();
336 const uint64_t nse = getNSE();
337 assert(dimRank == getRank());
338 std::vector<C> dimCoords(dimRank);
339 bool isSorted = false;
340 char *linePtr;
341 const auto readNextElement = [&]() {
342 linePtr = readCoords<C>(dimCoords.data());
343 map.pushforward(dimCoords.data(), lvlCoordinates);
344 *values = detail::readValue<V, IsPattern>(&linePtr);
345 if (isSorted) {
346 // Note that isSorted is set to false when reading the first element,
347 // to guarantee the safeness of using prevLvlCoords.
348 C *prevLvlCoords = lvlCoordinates - lvlRank;
349 for (uint64_t l = 0; l < lvlRank; ++l) {
350 if (prevLvlCoords[l] != lvlCoordinates[l]) {
351 if (prevLvlCoords[l] > lvlCoordinates[l])
352 isSorted = false;
353 break;
354 }
355 }
356 }
357 lvlCoordinates += lvlRank;
358 ++values;
359 };
360 readNextElement();
361 isSorted = true;
362 for (uint64_t n = 1; n < nse; ++n)
363 readNextElement();
364 return isSorted;
365}
366
367} // namespace sparse_tensor
368} // namespace mlir
369
370#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H
A class for capturing the sparse tensor type map with a compact encoding.
Definition MapRef.h:32
void pushforward(const T *in, T *out) const
Definition MapRef.h:42
uint64_t getLvlRank() const
Definition MapRef.h:82
uint64_t getDimRank() const
Definition MapRef.h:81
A memory-resident sparse tensor in coordinate-scheme representation (a collection of Elements).
Definition COO.h:66
void assertMatchesShape(uint64_t rank, const uint64_t *shape) const
Asserts the shape subsumes the actual dimension sizes.
Definition File.cpp:65
bool isPattern() const
Gets the MME "pattern" property setting.
Definition File.h:158
void closeFile()
Closes the file.
Definition File.cpp:34
SparseTensorStorage< P, C, V > * readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes, const LevelType *lvlTypes, const uint64_t *dim2lvl, const uint64_t *lvl2dim)
Allocates a new sparse-tensor storage object with the given encoding, initializes it by reading all t...
Definition File.h:205
uint64_t getDimSize(uint64_t d) const
Safely gets the size of the given dimension.
Definition File.h:191
SparseTensorReader(const SparseTensorReader &)=delete
void readHeader()
Reads and parses the file's header.
Definition File.cpp:50
bool canReadAs(PrimaryType valTy) const
Checks if the file's ValueKind can be converted into the given tensor PrimaryType.
Definition File.cpp:73
uint64_t getNSE() const
Gets the number of stored elements.
Definition File.h:179
bool isValid() const
Checks if a header has been successfully read.
Definition File.h:150
ValueKind getValueKind() const
Returns the stored value kind.
Definition File.h:147
const uint64_t * getDimSizes() const
Gets the dimension-sizes array.
Definition File.h:187
bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl, const uint64_t *lvl2dim, C *lvlCoordinates, V *values)
Reads the COO tensor from the file, stores the coordinates and values to the given buffers,...
Definition File.h:318
bool isSymmetric() const
Gets the MME "symmetric" property setting.
Definition File.h:165
SparseTensorReader & operator=(const SparseTensorReader &)=delete
uint64_t getRank() const
Gets the dimension-rank of the tensor.
Definition File.h:172
static SparseTensorReader * create(const char *filename, uint64_t dimRank, const uint64_t *dimShape, PrimaryType valTp)
Factory method to allocate a new reader, open the file, read the header, and validate that the actual...
Definition File.h:116
SparseTensorReader(const char *filename)
Definition File.h:105
void openFile()
Opens the file for reading.
Definition File.cpp:21
A memory-resident sparse tensor using a storage scheme based on per-level sparse/dense annotations.
Definition Storage.h:195
static SparseTensorStorage< P, C, V > * newFromCOO(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank, const uint64_t *lvlSizes, const LevelType *lvlTypes, const uint64_t *dim2lvl, const uint64_t *lvl2dim, SparseTensorCOO< V > *lvlCOO)
Allocates a new sparse tensor and initializes it from the given COO.
Definition Storage.h:590
This file contains the declaration of the mlir::NonFloatComplex type and mlir::Complex type alias.
std::enable_if_t<!is_complex< V >::value, V > readValue(char **linePtr)
Returns an element-value of non-complex type.
Definition File.h:47
PrimaryType
Encoding of the elemental type, for "overloading" @newSparseTensor.
Definition Enums.h:82
Include the generated interface declarations.
This enum defines all the sparse representations supportable by the SparseTensor dialect.
Definition Enums.h:238