Project

General

Profile

Statistics
| Branch: | Revision:

gdp-if / tensorflow / gdpfs.h @ master

History | View | Annotate | Download (11.7 KB)

1 13a1f6b0 Eric Allman
/* vim: set ai sw=4 sts=4 ts=4 :*/
2
3
/*
4
** A virtual GDP-based filesystem implementation.
5
**
6
** A file system is anchored by a "meta log", which is the root of
7
** the filesystem and carries the equivalent of all the "inodes" ---
8
** essentially the metadata for the file system.  The first record in
9
** that log has to be a GDPfsMeta entry.
10
**
11
** Directories exist only in the meta-log.  Each directory entry
12
** encodes the operation (GDPfs::ADD to add a new entry or GDPfs::DEL
13
** to delete an entry, the file-system name of the entry, the type
14
** of the entry (GDPfs::DIR for a nested directory or GDPfs::FILE
15
** for a leaf node), and if it is a file, the name of the log containing
16
** the contents of that file.
17
**
18
** Files are stored in separate logs, each of which has a randomized
19
** name.  Files are a sequence of GDPfsFchunk records which are
20
** concatenated to get the contents of the file.
21
**
22
** Disclaimer: I'm doing this description based on my understanding of
23
** how it works based on reading the code.  It may not be completely
24
** accurate, and doesn't match some of the other comments by the
25
** original authors.   - EA 7/2020.
26
*/
27
28 a531ea78 Nitesh Mor
#include <iostream>
29
#include <cstdint>
30
#include <map>
31
#include <vector>
32
#include <string>
33
#include <assert.h>
34 c4c264f3 Nitesh Mor
#include <sys/time.h>
35 a531ea78 Nitesh Mor
36
#ifndef GDP_FS_CAAPI_H_
37
#define GDP_FS_CAAPI_H_
38
39
#include "GDPfs.pb.h"
40
41
extern "C" {
42
        #include <gdp/gdp.h>
43
        #include <ep/ep_dbg.h>
44 ec84b225 Nitesh Mor
        #include <ep/ep_app.h>
45
        #include <ep/ep_crypto.h>
46 a531ea78 Nitesh Mor
}
47
48 418bfd3f Nitesh Mor
#define MAX_RECSIZE 65536                // Max data in a single record
49 5cee3d9f Eric Allman
#define MAX_WRITESIZE 32768                // Max size of a write (excluding protobuf
50 418bfd3f Nitesh Mor
                                                                //                overhead)
51
52 2813f1d2 Nitesh Mor
// Error codes and such
53
typedef enum
54
{
55
        kSuccess = 0,                                // generic success code
56
        kFailure = -1,                                // generic failure code
57
58
        kBadConfig = -2,                        // bad configuration
59
        kInvalidArg = -3,                        // invalid argument
60
61
        kGdpError = -4,                                // generic error by the GDP library
62
        kGdpNameError = -5,                        // GDP library error during name parsing
63
        kGdpErrorCreate = -6,                // GDP library error during creation
64
65
        kProtoParsingError = -10,        // error during parsing message
66
67
} GdpfsStatus;
68
69 ca58f4e8 Nitesh Mor
// our version of a stat structure, very similar to
70
// tensorflow FileStatistics
71
struct GdpStat
72
{
73
        int64_t length = -1;
74
        int64_t mtime_nsec = 0;
75
        bool is_directory = false;
76
77
        GdpStat() {}
78
        GdpStat(int64_t length, int64_t mtime_nsec, bool is_directory)
79
                        : length(length), mtime_nsec(mtime_nsec),
80
                                is_directory(is_directory) {}
81
        ~GdpStat() {}
82
83
};
84
85
86 13a1f6b0 Eric Allman
/************************** FUNCTIONS *********************************/
87 2813f1d2 Nitesh Mor
88 418bfd3f Nitesh Mor
// Get time since epoch in ns.
89
uint64_t TimeNS();
90
91
// Initializes the library. "mode" provides a general global I/O mode,
92
// invidual usage can be more restrictive. i.e. memory-mapped files are
93
// always in RO mode, even if the initialization mode allows writing.
94 a231577b Nitesh Mor
// "debug_setting" overrrides any config file settings for debugging
95
// output.
96
GdpfsStatus GDPfsInit(gdp_iomode_t mode=GDP_MODE_RO,
97
                                                const char* debug_setting=NULL);
98 418bfd3f Nitesh Mor
99
// Create a null terminated string of length "len" (including the null)
100
// and store it at provided memory location. If "cheat" is true, then
101
// a deterministic string is generated instead of truly random string,
102
// usually resulting in much faster creation.
103
// Assumes that "s" can hold "len" number of bytes.
104
void RandData(char *s, const int len, bool cheat=false);
105
106
// Returns the full path (fname) split into individual components.
107
// Uses "://" as a delimiter for scheme, and "/" as a delimiter for
108
// path components. An example:
109
// "gdp://x/y/z" => "scheme" = "gdp", "parts"={"x", "y", "z"}
110
void SplitPath(const std::string& fname,
111
                                        std::string* scheme,
112
                                        std::vector<std::string> *parts);
113
114
// Takes in a full path (including "gdp://") and returns the
115 8ab66577 Nitesh Mor
// name of the directory log in "topdir" and everything else
116 418bfd3f Nitesh Mor
// in "remaining". As a side effect, performs sanitization of
117
// the path (duplicated '//', etc).
118
// e.g. "gdp://x/y/z" => "topdir" = "x", "remaining" = "y/z"
119 6c1b37e9 Nitesh Mor
// Returns error when the path can not be parsed as a gdp path.
120
GdpfsStatus ParsePath(const std::string& fname,
121 418bfd3f Nitesh Mor
                                        std::string* topdir,
122
                                        std::string* remaining);
123
124 13a1f6b0 Eric Allman
// Splits the `fname` into `dirname` and `basename` (the last part
125 8ab66577 Nitesh Mor
// of a name. Input name may or may not have the protocol
126
// (gdp://) included. However, dirname never includes this
127
// protocol name.
128 13a1f6b0 Eric Allman
// e.g., "gdp://x/y/z" => dirname = "x/y", basename = "z".
129 418bfd3f Nitesh Mor
void BaseDirName(const std::string& fname, std::string* dirname,
130
                                                std::string* basename);
131 41631f5e Nitesh Mor
132 418bfd3f Nitesh Mor
// Returns whether the path is a valid file/directory name.
133
// At the moment, simply checks for presence of a '/' in the name.
134
bool NameValid(const std::string& path);
135 a531ea78 Nitesh Mor
136 13a1f6b0 Eric Allman
// Create a new log and return the full name in logname, open log in *ginp.
137
GdpfsStatus CreateLog(std::string *logname, gdp_gin_t **ginp = NULL);
138 a531ea78 Nitesh Mor
139 f7954a6e Nitesh Mor
// Parsing utility function. Given a string s, parse this as
140
// an GDPfsMsg set with GDPfsFchunk. Returns offset,
141
// length and actual data (as a string).
142 2813f1d2 Nitesh Mor
GdpfsStatus ParseMsgFchunk(const std::string& s, size_t *offset,
143 f7954a6e Nitesh Mor
                                                  size_t *len, std::string *d);
144
145 13a1f6b0 Eric Allman
// Initializes a root directory --- essentialy mkfs for GDPfs
146
GdpfsStatus GDPfsMkfs(const std::string& rootdir);
147
148
149
/****************************** CLASSES *******************************/
150
151 05e107f7 Nitesh Mor
// A class for low level file access. This is the base class for all
152
// GDP files/directories. Defines quite a few common utility functions,
153
// not all of which are useful for every scenario.
154 13a1f6b0 Eric Allman
//
155
// This is a bit strange since it is the base class for both files and
156
// directories.  Directories do not have their own GIN (`handle_`), so it's
157
// not clear why that field isn't in the GDPFile class instead of
158
// GDPFileLowLevel.
159 a531ea78 Nitesh Mor
class GDPFileLowLevel
160
{
161
        public:
162 69da5427 Nitesh Mor
163 70e2bd67 Nitesh Mor
        // initialize a file/dir backed by given logname in the
164 f7954a6e Nitesh Mor
        // specified mode. Also calls SyncLog to set up state
165
        // variables in some reasonable state.
166 6b980dad Nitesh Mor
        GDPFileLowLevel(const std::string& logname, gdp_iomode_t mode);
167 a531ea78 Nitesh Mor
        ~GDPFileLowLevel();
168
169 41ecf852 Nitesh Mor
        // check for freshness; updates: maxrecs_, type_, mtime_ns_
170 70e2bd67 Nitesh Mor
        void SyncLog();
171 41ecf852 Nitesh Mor
172 69da5427 Nitesh Mor
        // Get the type (FILE/DIR). Also sync
173
        // with the server if sync==true
174 9a538e6d Nitesh Mor
        GDPfs::FileType GetType(bool sync=false);
175 69da5427 Nitesh Mor
176
        // Get the number of records in the log.
177
        // Optionally, sync with the server.
178 70e2bd67 Nitesh Mor
        gdp_recno_t GetNumRecs(bool sync=false);
179 69da5427 Nitesh Mor
180 4506b093 Nitesh Mor
        // Get the last update time (in nano-seconds). Always sync.
181
        uint64_t GetMTime();
182 78fd96a8 Nitesh Mor
183 a531ea78 Nitesh Mor
        protected:
184
185 89b1655d Nitesh Mor
        /********************* variables ************************/
186
187 28acbbb5 Nitesh Mor
        // some basic instance variables
188 4506b093 Nitesh Mor
        std::string logname_;        // human readable name for gin/log
189
        gdp_name_t gobname_;        // internal name for gin/log
190
        gdp_iomode_t mode_;                // mode (GDP_MODE_RO/GDP_MODE_RA/etc)
191
        gdp_gin_t* handle_;                // associated gin/log handle
192 a531ea78 Nitesh Mor
193 4506b093 Nitesh Mor
        // some state variables based on our reading of the data
194
        GDPfs::FileType type_ = GDPfs::UNKNOWN_TYPE;        // FILE or DIR
195
        gdp_recno_t maxrecs_ = 0;                                                // num records in log
196
        uint64_t mtime_ns_ = 0;                                                        // last update time
197 625d5ff1 Nitesh Mor
198 41ecf852 Nitesh Mor
        // TODO: we should be using strings in rec_cache_, instead of pointers
199 70e2bd67 Nitesh Mor
        std::map<gdp_recno_t, std::string*> rec_cache_; // cache for records
200 28acbbb5 Nitesh Mor
201 89b1655d Nitesh Mor
        /********************** methods *************************/
202
203 4506b093 Nitesh Mor
        // set the type for this file (FILE or DIR)
204 418bfd3f Nitesh Mor
        void SetType(GDPfs::FileType type);
205 a531ea78 Nitesh Mor
206 4506b093 Nitesh Mor
        // stores contents in string and returns the record number
207 418bfd3f Nitesh Mor
        gdp_recno_t ReadRecord(gdp_recno_t recno, std::string *s);
208 4506b093 Nitesh Mor
        // returns the number of records read
209 70e2bd67 Nitesh Mor
        int32_t ReadRecordAsync(gdp_recno_t startrec, int32_t numrec,
210
                                                        std::string **s);
211 4506b093 Nitesh Mor
        // if async=true, returns without waiting for a response.
212 89b1655d Nitesh Mor
        GdpfsStatus AppendRecord(const std::string& s, bool async=false);
213 625d5ff1 Nitesh Mor
214 28acbbb5 Nitesh Mor
        // operating on record cache
215 e15ba88d Nitesh Mor
        GdpfsStatus SetCache(const gdp_recno_t recno, const std::string *s);
216
        GdpfsStatus GetCache(const gdp_recno_t recno, std::string *s);
217 a531ea78 Nitesh Mor
};
218
219 28acbbb5 Nitesh Mor
220 13a1f6b0 Eric Allman
/**********************************************************************/
221 805913c2 Nitesh Mor
// For all intents and purposes, a GDPDir represents a file system
222
// rooted at a specific name
223 a531ea78 Nitesh Mor
class GDPDir : public GDPFileLowLevel
224
{
225
        public:
226 6b980dad Nitesh Mor
        GDPDir(const std::string& logname, gdp_iomode_t mode);
227 4506b093 Nitesh Mor
228
        // internal state: a map of name=>logname
229 ebd8f911 Nitesh Mor
        std::map<std::string, std::string> dentries_;
230 a531ea78 Nitesh Mor
231 4506b093 Nitesh Mor
        // create a new file+log; stores the logname. If recursive==true,
232
        // any non-existent paths on the way are created.
233 418bfd3f Nitesh Mor
        void NewFile(const std::string name, std::string *logname,
234 9a538e6d Nitesh Mor
                                        bool recursive=true);
235 4506b093 Nitesh Mor
236
        // create a directory. Any non-existent parent directories
237
        // are created if recursive==true
238 2651f137 Nitesh Mor
        void CreateDir(const std::string name, bool recursive=true);
239 b6438f0c Nitesh Mor
240 4506b093 Nitesh Mor
        // Delete a file; only deletes/unlinks the entry (but not the log)
241 2651f137 Nitesh Mor
        void DeleteFile(std::string name) { DelEntry(name); }
242 4506b093 Nitesh Mor
243 2651f137 Nitesh Mor
        // Same as DeleteFile, but for directory.
244
        void DeleteDir(std::string name) { DelEntry(name); }
245 a531ea78 Nitesh Mor
246 beec3457 Nitesh Mor
        // renaming files is much more easier than renaming directories.
247 69da5427 Nitesh Mor
        void RenameFile(const std::string& oldname,
248
                                const std::string& newname) { RenameEntry(oldname, newname); }
249 beec3457 Nitesh Mor
250 adceff07 Nitesh Mor
        // returns the list of children for a specific sub-directory, or
251
        // the current directory (if dirname is empty/null)
252
        void GetChildren(const std::string& dirname,
253
                                                std::vector<std::string>* children);
254
255 8dce4a9a Nitesh Mor
        // returns a list of matching path names based on a  minimally
256
        // implemented regex. For full details, please see the tensorflow
257
        // requirements.
258
        void GetMatchingPaths(const std::string& pattern,
259
                                                std::vector<std::string>* results);
260
261 4506b093 Nitesh Mor
        // Returns true if the name exists
262 418bfd3f Nitesh Mor
        bool NameExists(const std::string& name);
263 4506b093 Nitesh Mor
264 ca58f4e8 Nitesh Mor
        // populates a GdpStat structure
265
        void Stat(const std::string& name, GdpStat* stat);
266
267 4506b093 Nitesh Mor
        // Returns the corresponding logname for the given entry
268 418bfd3f Nitesh Mor
        void GetEntryLogname(const std::string& name, std::string *logname);
269 4506b093 Nitesh Mor
270
        // Returns the type of the entry (FILE/DIR) for the given name
271 418bfd3f Nitesh Mor
        GDPfs::FileType GetEntryType(const std::string& name);
272 a531ea78 Nitesh Mor
273
        private:
274 8263d948 Nitesh Mor
275 70e2bd67 Nitesh Mor
        // create a brand new entry; also creates a new log in the process
276 418bfd3f Nitesh Mor
        void NewEntry(std::string name, GDPfs::FileType t,
277 9a538e6d Nitesh Mor
                                                std::string *logname, bool recursive);
278 4506b093 Nitesh Mor
279
        // add a pre-existing logname as a specific name.
280 418bfd3f Nitesh Mor
        void AddEntry(std::string name, std::string logname, GDPfs::FileType t);
281 4506b093 Nitesh Mor
282
        // Deletes an entry (file/directory)
283 418bfd3f Nitesh Mor
        void DelEntry(std::string name);
284 4506b093 Nitesh Mor
285
        // Renames one entry to another.
286 418bfd3f Nitesh Mor
        void RenameEntry(const std::string& oldname, const std::string& newname);
287 a531ea78 Nitesh Mor
};
288
289 13a1f6b0 Eric Allman
/**********************************************************************/
290 05e107f7 Nitesh Mor
class GDPFile : public GDPFileLowLevel
291
{
292
        public:
293 6b980dad Nitesh Mor
        GDPFile(const std::string& logname, gdp_iomode_t mode);
294 4506b093 Nitesh Mor
295
        // Returns the size of the file.
296 8263d948 Nitesh Mor
        size_t GetFileSize(bool sync=true);
297 05e107f7 Nitesh Mor
298
        protected:
299 8263d948 Nitesh Mor
        size_t filesize_ = 0;
300 05e107f7 Nitesh Mor
301
};
302 a531ea78 Nitesh Mor
303 13a1f6b0 Eric Allman
/**********************************************************************/
304 28acbbb5 Nitesh Mor
// A simple, read-only memory mapped file
305 05e107f7 Nitesh Mor
class GDPFileROMemMap : public GDPFile
306 a531ea78 Nitesh Mor
{
307
        public:
308 6b980dad Nitesh Mor
        GDPFileROMemMap(const std::string& logname, bool async=true);
309 4506b093 Nitesh Mor
310 9a538e6d Nitesh Mor
        const char *data()
311
        {
312 ebd8f911 Nitesh Mor
                return filedata_.c_str();        // initialized in constructor
313 99768081 Nitesh Mor
        }
314 4506b093 Nitesh Mor
315 9a538e6d Nitesh Mor
        uint64_t length()
316
        {
317 ebd8f911 Nitesh Mor
                return filedata_.length();
318 99768081 Nitesh Mor
        }
319 a531ea78 Nitesh Mor
320
        protected:
321 ebd8f911 Nitesh Mor
        std::string filedata_;
322 abd157eb Nitesh Mor
323 4ae394a1 Nitesh Mor
        // Fetch records [startrec..endrec] (i.e. including both
324
        // extremes) and update the filedata_ internally.
325
        void FetchRecords(gdp_recno_t startrec, gdp_recno_t endrec,
326 418bfd3f Nitesh Mor
                                           bool async=true);
327 a531ea78 Nitesh Mor
};
328
329 13a1f6b0 Eric Allman
/**********************************************************************/
330 8263d948 Nitesh Mor
// A read only file, based on the memory mapped file.
331 a531ea78 Nitesh Mor
class GDPFileRO : public GDPFileROMemMap
332
{
333
        public:
334 6b980dad Nitesh Mor
        GDPFileRO(const std::string& logname, bool async=true)
335 99768081 Nitesh Mor
                                        : GDPFileROMemMap(logname, async) {}
336 4506b093 Nitesh Mor
337
        // Reads at most 'len' bytes at 'offset', stores them in 'buf'.
338
        // Returns the actual number of bytes returned.
339 3c340135 Nitesh Mor
        size_t Read(size_t offset, size_t len, char *buf);
340 a531ea78 Nitesh Mor
};
341
342 13a1f6b0 Eric Allman
/**********************************************************************/
343 28acbbb5 Nitesh Mor
// an append only file; the name is a misnomer
344 05e107f7 Nitesh Mor
class GDPFileWO : public GDPFile
345 a531ea78 Nitesh Mor
{
346
        public:
347 6b980dad Nitesh Mor
        GDPFileWO(const std::string& logname)
348 99768081 Nitesh Mor
                                        : GDPFile(logname, GDP_MODE_AO) {}
349 4506b093 Nitesh Mor
350
        // Append an (arbitrarily long) string, by potentially splitting
351
        // it across a number of records.
352 70b43258 Nitesh Mor
        void Append(const std::string& s, bool async=true);
353 a531ea78 Nitesh Mor
};
354
355 69da5427 Nitesh Mor
#endif