The Sparta Modeling Framework
Loading...
Searching...
No Matches
FastCheckpointer.hpp
1// <FastCheckpointer> -*- C++ -*-
2
3#pragma once
4
5#include <iostream>
6#include <sstream>
7#include <stack>
8#include <queue>
9
11#include "sparta/functional/ArchData.hpp"
14
15#include "sparta/serialization/checkpoint/DeltaCheckpoint.hpp"
16
18#ifndef DEFAULT_SNAPSHOT_THRESH
19#define DEFAULT_SNAPSHOT_THRESH 20
20#endif
21
22namespace sparta::serialization::checkpoint
23{
66 {
67 public:
68
70 //typedef DeltaCheckpoint<storage::StringStreamStorage> checkpoint_type;
71
75
90 FastCheckpointer(TreeNode& root, Scheduler* sched=nullptr) :
91 Checkpointer(root, sched),
92 snap_thresh_(DEFAULT_SNAPSHOT_THRESH),
93 next_chkpt_id_(checkpoint_type::MIN_CHECKPOINT),
94 num_alive_checkpoints_(0),
95 num_alive_snapshots_(0),
96 num_dead_checkpoints_(0)
97 { }
98
105 // Reverse iterate and flag all as free
106 for(auto itr = chkpts_.rbegin(); itr != chkpts_.rend(); ++itr){
107 checkpoint_type* d = static_cast<checkpoint_type*>(itr->second.get());
108 if(!d->isFlaggedDeleted()){
109 d->flagDeleted();
110 }
111 }
112 }
113
116
120
133 uint32_t getSnapshotThreshold() const noexcept { return snap_thresh_; }
134
139 void setSnapshotThreshold(uint32_t thresh) noexcept {
140 snap_thresh_ = thresh;
141 }
142
145
149
169 void deleteCheckpoint(chkpt_id_t id) override {
170
171 // Flag checkpoint as deleted
173 if(!d){
174 throw CheckpointError("Could not delete checkpoint ID=")
175 << id << " because no checkpoint by this ID was found";
176 }
177
178 // Allow deletion and change ID to UNIDENTIFIED_CHECKPOINT.
179 // This is still part of a chain though until there are no
180 // dependencies on it.
181 if(!d->isFlaggedDeleted()){
182 num_dead_checkpoints_++;
183 if(d->isSnapshot()){
184 num_alive_snapshots_--;
185 }
186 num_alive_checkpoints_--;
187 d->flagDeleted();
188 }
189
190 // Delete this and all contiguous previous checkpoint which were
191 // flagged deleted if possible. Stop if current_ is encountered
192 cleanupChain_(d);
193 }
194
208 void loadCheckpoint(chkpt_id_t id) override {
210 if(!d){
211 throw CheckpointError("Could not load checkpoint ID=")
212 << id << " because no checkpoint by this ID was found";
213 }
214
215 d->load(getArchDatas());
216
217 // Move current to another checkpoint. Anything between head and the
218 // old current_ is fair game for removal if allowed
219 checkpoint_type* rmv = static_cast<checkpoint_type*>(getCurrent_());
220 setCurrent_(d);
221
222 // Restore scheduler tick number
223 if(sched_){
225 }
226
227 // Remove all checkpoints which can be. Stop if the new current_ is
228 // encountered again.
229 // Note that is is OK if current_ was moved to a later position in
230 // the chain. No important checkpoints will be removed. The
231 // important thing is never to remove current_.
232 cleanupChain_(rmv);
233 }
234
239 bool exists = false;
241 if(d){
242 exists = true;
243 }
244 return exists;
245 }
246
255 std::vector<chkpt_id_t> getCheckpointsAt(tick_t t) const override {
256 std::vector<chkpt_id_t> results;
257 for(auto& p : chkpts_){
258 const Checkpoint* cp = p.second.get();
259 const checkpoint_type* dcp = static_cast<const checkpoint_type*>(cp);
260 if(cp->getTick() == t && !dcp->isFlaggedDeleted()){
261 results.push_back(cp->getID());
262 }
263 }
264 return results;
265 }
266
275 std::vector<chkpt_id_t> getCheckpoints() const override {
276 std::vector<chkpt_id_t> results;
277 for(auto& p : chkpts_){
278 const Checkpoint* cp = p.second.get();
279 const checkpoint_type* dcp = static_cast<const checkpoint_type*>(cp);
280 if(!dcp->isFlaggedDeleted()){
281 results.push_back(cp->getID());
282 }
283 }
284 return results;
285 }
286
290 uint32_t getNumCheckpoints() const noexcept override {
291 return num_alive_checkpoints_;
292 }
293
297 uint32_t getNumSnapshots() const noexcept {
298 return num_alive_snapshots_;
299 }
300
304 uint32_t getNumDeltas() const noexcept {
306 }
307
312 uint32_t getNumDeadCheckpoints() const noexcept {
313 return num_dead_checkpoints_;
314 }
315
331 std::deque<chkpt_id_t> getCheckpointChain(chkpt_id_t id) const override {
332 std::deque<chkpt_id_t> results;
333 if(!getHead()){
334 return results;
335 }
336 const checkpoint_type* d = findCheckpoint_(id);
337 if(!d){
338 throw CheckpointError("There is no checkpoint with ID ") << id;
339 }
340 while(d){
341 results.push_back(d->getID());
342 d = static_cast<checkpoint_type*>(d->getPrev());
343 }
344 return results;
345 }
346
365 chkpt_id_t from) override {
367 if(!d){
368 throw CheckpointError("There is no checkpoint with ID ") << from;
369 }
370
371 // Search backward
372 do{
373 if(d->getTick() <= tick){
374 break;
375 }
376 d = static_cast<checkpoint_type*>(d->getPrev());
377 }while(d);
378
379 return d;
380 }
381
382
390
393
397
401 std::string stringize() const override {
402 std::stringstream ss;
403 ss << "<FastCheckpointer on " << getRoot().getLocation() << '>';
404 return ss.str();
405 }
406
410 void traceValue(std::ostream& o, chkpt_id_t id, const ArchData* container, uint32_t offset, uint32_t size) override {
412 o << "trace: Searching for 0x" << std::hex << offset << " (" << std::dec << size
413 << " bytes) in ArchData " << (const void*)container << " when loading checkpoint "
414 << std::dec << id << std::endl;
415 if(!dcp){
416 o << "trace: Checkpoint " << id << " not found" << std::endl;
417 }else{
418 dcp->traceValue(o, getArchDatas(), container, offset, size);
419 }
420 }
421
424
425 protected:
426
440
441 // In order to truly delete any checkpoints, we must traverse back
442 // to the previous snapshot (or the head) and forward to the another
443 // snapshot or the end of the chain.
444 // ONLY if both of those points can be reached without encountering
445 // a living checkpoint or the current checkpoint (forward
446 // only) can the whole chain (including the leading shapshot) be
447 // deleted.
448
450
451 if(d == getHead()){
452 // Cannot delete head of checkpoint tree
453 return;
454 }
455
456 // Walk forward to another snapshot or current
457 const bool needed_later = (getCurrent_() == d) || recursForwardFindAlive_(d);
458 if(needed_later) {
459 // Cannot delete because a later living checkpoint (or current) depends on this
460 if(d->isSnapshot()){
461 // This snapshot is needed later. Move to previous delta and work from there
462 d = static_cast<checkpoint_type*>(d->getPrev());
463 }else{
464 return; // This delta is needed. Therefore all preceeding deltas are needed
465 }
466 }
467
468 // Delete backward until current, head, or a non-flagged-deleted checkpoint is hit.
469 // It is possible to fracture the checkpoint tree by deleting a segment
470 // between two snapshots, so prev can end up with nothing leading up to it
471 while(d && d != getHead() && d->isFlaggedDeleted()){
472
473 // If the checkpoint to delete is the current checkpoint, then
474 // We cannot just set current to the previous checkpoint because
475 // we may have run forward and storing a checkpoint in the
476 // future would depend on the checkpoint we are about to delete.
477 // This could be fixed by requiring the next checkpoint to be a
478 // spapshot. Instead, point to the flagged-deleted checkpoint
479 // and do not delete
480 if(getCurrent_() == d){
481 return;
482 }
483
484 checkpoint_type* prev = static_cast<checkpoint_type*>(d->getPrev());
485
486 // If nothing later in the chain (tree) depends on d's data, it can be deleted.
487 // This also patches the checkpoint tree around the deleted checkpoint
489 if(d->canDelete()) {
490 // Get checkpoint id regardless of whether alive or dead
491 chkpt_id_t id = d->getID();
492 if (d->isFlaggedDeleted()) {
493 id = d->getDeletedID();
494 }
495
496 num_dead_checkpoints_--;
497
498 // Erase element in the map
499 auto itr = chkpts_.find(id);
500 sparta_assert(itr != chkpts_.end());
501 chkpts_.erase(itr);
502 }
503
504 d = prev; // Continue until head is reached
505 }
506 }
507
518 {
519 const std::vector<Checkpoint*> & nexts = d->getNexts();
520 for(const auto & chkpt : nexts)
521 {
522 checkpoint_type* dc = static_cast<checkpoint_type*>(chkpt);
523 // Only check descendants for snapshot-ness
524 if(dc->isSnapshot()){
525 // Found a live snapshot that ends this branch. d is not needed
526 // after this
527 return false;
528 }
529 if(dc == getCurrent_()){
530 // Found current in this search chain
531 return true;
532 }
533 if(dc->isFlaggedDeleted() == false){
534 // Encountered a checkpoint later in the chain that still
535 // depends on this.
536 return true;
537 }
538
539 // Continue the search recursively
541 return true;
542 }
543 }
544
545 // Found nothing alive.
546 return false;
547 }
548
557 auto itr = chkpts_.find(id);
558 if (itr != chkpts_.end()) {
559 return static_cast<checkpoint_type*>(itr->second.get());
560 }
561 return nullptr;
562 }
563
567 const checkpoint_type* findCheckpoint_(chkpt_id_t id) const noexcept override {
568 auto itr = chkpts_.find(id);
569 if (itr != chkpts_.end()) {
570 return static_cast<checkpoint_type*>(itr->second.get());
571 }
572 return nullptr;
573 }
574
578 void dumpCheckpointNode_(const Checkpoint* chkpt, std::ostream& o) const override {
579 static std::string SNAPSHOT_NOTICE = "(s)";
580
581 // checkpoint_type is a known direct base class of Checkpoint
582 const checkpoint_type* cp = static_cast<const checkpoint_type*>(chkpt);
583
584 // Draw data for this checkpoint
585 if(cp->isFlaggedDeleted()){
586 o << chkpt->getDeletedRepr();
587 }else{
588 o << chkpt->getID();
589 }
590 // Show that this is a snapshot
591 if(cp->isSnapshot()){
592 o << ' ' << SNAPSHOT_NOTICE;
593 }
594 }
595
596 private:
597
601 void createHead_() override {
602 tick_t tick = 0;
603 if(sched_){
604 tick = sched_->getCurrentTick();
605 }
606
607 if(getHead()){
608 throw CheckpointError("Cannot create head at ")
609 << tick << " because a head already exists in this checkpointer";
610 }
611 if(getRoot().isFinalized() == false){
612 CheckpointError exc("Cannot create a checkpoint until the tree is finalized. Attempting to checkpoint from node ");
613 exc << getRoot().getLocation() << " at tick ";
614 if(sched_){
615 exc << tick;
616 }else{
617 exc << "<no scheduler>";
618 }
619 throw exc;
620 }
621
622 checkpoint_type* dcp = new checkpoint_type(getRoot(), getArchDatas(), next_chkpt_id_++, tick, nullptr, true);
623 chkpts_[dcp->getID()].reset(dcp);
624 setHead_(dcp);
625 num_alive_checkpoints_++;
626 num_alive_snapshots_++;
627 setCurrent_(dcp);
628 }
629
630 chkpt_id_t createCheckpoint_(bool force_snapshot=false) override {
631 bool is_snapshot;
632 checkpoint_type* prev;
633
634 if(next_chkpt_id_ == checkpoint_type::UNIDENTIFIED_CHECKPOINT){
635 throw CheckpointError("Exhausted all ")
636 << checkpoint_type::UNIDENTIFIED_CHECKPOINT << " possible checkpoint IDs. "
637 << "This is likely a gross misuse of checkpointing";
638 }
639
640 // Caller guarantees a head
641 sparta_assert(getHead() != nullptr);
642
643 tick_t tick;
644 if(sched_){
645 tick = sched_->getCurrentTick();
646 }else{
647 tick = 0;
648 }
649
650 if(sched_ && (tick < getHead()->getTick())){
651 throw CheckpointError("Cannot create a new checkpoint at tick ")
652 << tick << " because this tick number is smaller than the tick number of the head checkpoint at: "
653 << getHead()->getTick() << ". The head checkpoint cannot be reset once created, so it should be done "
654 << "at the start of simulation before running. The simulator front-end should do this so this must "
655 << "likely be fixed in the simulator.";
656 }
657
658 if(nullptr == getCurrent_()){
659 // Creating a delta from the head
660 prev = static_cast<checkpoint_type*>(getHead_());
661 is_snapshot = false;
662 }else{
663 if(sched_ && (tick < getCurrent_()->getTick())){
664 throw CheckpointError("Current tick number from sparta scheduler (")
665 << tick << " ) is less than the current checkpoint's tick number ("
666 << getCurrent_()->getTick() << " To create a checkpoint with an earlier tick number, an "
667 << "older checkpoint having a tick number <= the tick number specified here must first be "
668 << "loaded";
669 }
670
671 // Find latest checkpoint <= tick
672
673 prev = static_cast<checkpoint_type*>(getCurrent_());
674 is_snapshot = prev->getDistanceToPrevSnapshot() >= getSnapshotThreshold();
675 }
676
677 checkpoint_type* dcp = new checkpoint_type(getRoot(),
678 getArchDatas(), // Created during createHead
679 next_chkpt_id_++,
680 tick,
681 prev,
682 force_snapshot || is_snapshot);
683 chkpts_[dcp->getID()].reset(dcp);
684 num_alive_checkpoints_++;
685 num_alive_snapshots_ += (dcp->isSnapshot() == true);
686 setCurrent_(dcp);
687
688 if (dcp->isSnapshot()){
689 // Clean up starting with this snapshot and moving back.
690 // May have an opportunity to free older deltas right now
691 // (instead of upon next deletion)
692 cleanupChain_(dcp);
693 }
694
695 return dcp->getID();
696 }
697
698
703 uint32_t snap_thresh_;
704
708 chkpt_id_t next_chkpt_id_;
709
714 uint32_t num_alive_checkpoints_;
715
722 uint32_t num_alive_snapshots_;
723
728 uint32_t num_dead_checkpoints_;
729 };
730
731} // namespace sparta::serialization::checkpoint
Set of macros for Sparta assertions. Caught by the framework.
#define sparta_assert(...)
Simple variadic assertion that will throw a sparta_exception if the condition fails.
Exception class for all of Sparta.
Basic Node framework in sparta device tree composite pattern.
Contains a set of contiguous line of architectural data which can be referred to by any architected o...
Definition ArchData.hpp:39
A class that lets you schedule events now and in the future.
Tick getCurrentTick() const noexcept
The current tick the Scheduler is working on or just finished.
void restartAt(Tick t)
Clears the events in the scheduler, sets the current tick to tick and the elapsed ticks to either tic...
Node in a composite tree representing a sparta Tree item.
Definition TreeNode.hpp:205
std::string getLocation() const override final
Indicates that there was an issue operating on checkpoints within the SPARTA framework.
Single checkpoint object interface with a tick number and an ID unique to the owning Checkpointer ins...
Checkpoint * getPrev() const noexcept
Returns the previous checkpoint. If this checkpoint is a snapshot, it has no previous checkpoint.
virtual std::string getDeletedRepr() const
Gets the representation of this deleted checkpoint as part of a checkpoint chain (if that checkpointe...
const std::vector< Checkpoint * > & getNexts() const noexcept
Returns next checkpoint following *this. May be an empty vector if there are no later checkpoints fol...
chkpt_id_t getID() const noexcept
Returns the ID of this checkpoint.
tick_t getTick() const noexcept
Returns the tick number at which this checkpoint was taken.
static const chkpt_id_t UNIDENTIFIED_CHECKPOINT
Indicates unidentified checkpoint (could mean 'invalid' or 'any') depending on context.
Checkpointer interface. Defines an ID-based checkpointing API for tree of related checkpoints which c...
const std::vector< ArchData * > & getArchDatas() const
Returns ArchDatas enumerated by this Checkpointer for iteration when saving or loading checkpoint dat...
Scheduler *const sched_
Scheduler whose tick count will be set and read. Cannnot be updated after first checkpoint without ba...
const TreeNode & getRoot() const noexcept
Returns the root associated with this checkpointer.
Checkpoint * getCurrent_() const noexcept
Gets the current checkpointer pointer. Returns nullptr if there is no current checkpoint object.
Checkpoint * getHead_() noexcept
Non-const variant of getHead_.
tick_t getCurrentTick() const
Gets the tick number of the current checkpoint (see getCurrentID). This is the tick number of the lat...
Checkpoint::tick_t tick_t
tick_t Tick type to which checkpoints will refer
std::map< chkpt_id_t, std::unique_ptr< Checkpoint > > chkpts_
All checkpoints sorted by ascending tick number (or equivalently ascending checkpoint ID since both a...
const Checkpoint * getHead() const noexcept
Returns the head checkpoint which is equivalent to the earliest checkpoint taken.
Checkpoint::chkpt_id_t chkpt_id_t
tick_t Tick type to which checkpoints will refer
void setCurrent_(Checkpoint *current)
Sets the current checkpoint pointer.
void setHead_(Checkpoint *head)
Sets the head checkpointer pointer to head for the first time.
Single delta checkpoint object containing all simulator state which changed since some previous Delta...
chkpt_id_t getDeletedID() const noexcept
Return the ID had by this checkpoint before it was deleted If this checkpoint has not been flagged fo...
bool isSnapshot() const noexcept
Is this checkpoint a snapshot (contains ALL simulator state)
bool isFlaggedDeleted() const noexcept
Indicates whether this checkpoint has been flagged deleted.
virtual void load(const std::vector< ArchData * > &dats) override
Attempts to restore this checkpoint including any previous deltas (dependencies).
bool canDelete() const noexcept
Can this checkpoint be deleted Cannot be deleted if:
void traceValue(std::ostream &o, const std::vector< ArchData * > &dats, const ArchData *container, uint32_t offset, uint32_t size)
Implement trace of a value across the restore chain as described in Checkpointer::traceValue.
void flagDeleted()
Allows this checkpoint to be deleted if it is no longer a previous delta of some other delta (i....
Implements quick checkpointing through delta-checkpoint trees which store state-deltas in a compact f...
FastCheckpointer(TreeNode &root, Scheduler *sched=nullptr)
FastCheckpointer Constructor.
void deleteCheckpoint(chkpt_id_t id) override
Deletes a checkpoint by ID.
void setSnapshotThreshold(uint32_t thresh) noexcept
Sets the snapshot threshold.
uint32_t getNumCheckpoints() const noexcept override
Gets the current number of checkpoints having valid IDs.
std::string stringize() const override
Returns a string describing this object.
void cleanupChain_(checkpoint_type *d)
Delete given checkpoint and all contiguous previous checkpoints which can be deleted (See checkpoint_...
checkpoint_type * findLatestCheckpointAtOrBefore(tick_t tick, chkpt_id_t from) override
Finds the latest checkpoint at or before the given tick starting at the from checkpoint and working b...
uint32_t getSnapshotThreshold() const noexcept
Returns the next-shapshot threshold.
void dumpCheckpointNode_(const Checkpoint *chkpt, std::ostream &o) const override
Implements Checkpointer::dumpCheckpointNode_.
std::vector< chkpt_id_t > getCheckpointsAt(tick_t t) const override
Gets all checkpoints taken at tick t on any timeline.
std::deque< chkpt_id_t > getCheckpointChain(chkpt_id_t id) const override
Debugging utility which gets a deque of checkpoints representing a chain starting at the checkpoint h...
const checkpoint_type * findCheckpoint_(chkpt_id_t id) const noexcept override
const variant of findCheckpoint_
uint32_t getNumDeltas() const noexcept
Gets the current number of delta checkpoints with valid IDs.
void loadCheckpoint(chkpt_id_t id) override
Loads state from a specific checkpoint by ID.
checkpoint_type * findCheckpoint_(chkpt_id_t id) noexcept override
Attempts to find a checkpoint within this checkpointer by ID.
uint32_t getNumSnapshots() const noexcept
Gets the current number of snapshots with valid IDs.
uint32_t getNumDeadCheckpoints() const noexcept
Gets the curent number of checkpoints (delta or snapshot) withOUT valid IDs.
bool recursForwardFindAlive_(checkpoint_type *d) const
Look forward to see if any future checkpoints depend on d.
std::vector< chkpt_id_t > getCheckpoints() const override
Gets all checkpoint IDs available on any timeline sorted by tick (or equivalently checkpoint ID).
checkpoint_type * findInternalCheckpoint(chkpt_id_t id)
Gets a checkpoint through findCheckpoint interface casted to the type of Checkpoint subclass used by ...
bool checkpointExists(chkpt_id_t id)
Queries a specific checkpoint by ID.
void traceValue(std::ostream &o, chkpt_id_t id, const ArchData *container, uint32_t offset, uint32_t size) override
Forwards debug/trace info onto checkpoint by ID.