The Sparta Modeling Framework
Loading...
Searching...
No Matches
FastCheckpointer.hpp
1// <FastCheckpointer> -*- C++ -*-
2
3#pragma once
4
5#include <iostream>
6#include <sstream>
7#include <stack>
8#include <queue>
9
11#include "sparta/functional/ArchData.hpp"
14
15#include "sparta/serialization/checkpoint/DeltaCheckpoint.hpp"
16
18#ifndef DEFAULT_SNAPSHOT_THRESH
19#define DEFAULT_SNAPSHOT_THRESH 20
20#endif
21
22namespace sparta::serialization::checkpoint
23{
66 {
67 public:
68
70 //typedef DeltaCheckpoint<storage::StringStreamStorage> checkpoint_type;
71
75
90 FastCheckpointer(TreeNode& root, Scheduler* sched=nullptr) :
91 Checkpointer(root, sched),
92 snap_thresh_(DEFAULT_SNAPSHOT_THRESH),
93 next_chkpt_id_(checkpoint_type::MIN_CHECKPOINT),
94 num_alive_checkpoints_(0),
95 num_alive_snapshots_(0),
96 num_dead_checkpoints_(0)
97 { }
98
113 FastCheckpointer(const std::vector<sparta::TreeNode*>& roots, Scheduler* sched=nullptr) :
114 Checkpointer(roots, sched),
115 snap_thresh_(DEFAULT_SNAPSHOT_THRESH),
116 next_chkpt_id_(checkpoint_type::MIN_CHECKPOINT),
117 num_alive_checkpoints_(0),
118 num_alive_snapshots_(0),
119 num_dead_checkpoints_(0)
120 { }
121
128 // Reverse iterate and flag all as free
129 for(auto itr = chkpts_.rbegin(); itr != chkpts_.rend(); ++itr){
130 checkpoint_type* d = static_cast<checkpoint_type*>(itr->second.get());
131 if(!d->isFlaggedDeleted()){
132 d->flagDeleted();
133 }
134 }
135 }
136
139
143
156 uint32_t getSnapshotThreshold() const noexcept { return snap_thresh_; }
157
162 void setSnapshotThreshold(uint32_t thresh) noexcept {
163 snap_thresh_ = thresh;
164 }
165
172 uint64_t getTotalMemoryUse() const noexcept override {
173 uint64_t mem = 0;
174 for(auto& cp : chkpts_){
175 mem += cp.second->getTotalMemoryUse();
176 }
177 return mem;
178 }
179
184 uint64_t getContentMemoryUse() const noexcept override {
185 uint64_t mem = 0;
186 for(auto& cp : chkpts_){
187 mem += cp.second->getContentMemoryUse();
188 }
189 return mem;
190 }
191
194
198
218 void deleteCheckpoint(chkpt_id_t id) override {
219
220 // Flag checkpoint as deleted
222 if(!d){
223 throw CheckpointError("Could not delete checkpoint ID=")
224 << id << " because no checkpoint by this ID was found";
225 }
226
227 // Allow deletion and change ID to UNIDENTIFIED_CHECKPOINT.
228 // This is still part of a chain though until there are no
229 // dependencies on it.
230 if(!d->isFlaggedDeleted()){
231 num_dead_checkpoints_++;
232 if(d->isSnapshot()){
233 num_alive_snapshots_--;
234 }
235 num_alive_checkpoints_--;
236 d->flagDeleted();
237 }
238
239 // Delete this and all contiguous previous checkpoint which were
240 // flagged deleted if possible. Stop if current_ is encountered
241 cleanupChain_(d);
242 }
243
257 void loadCheckpoint(chkpt_id_t id) override {
259 if(!d){
260 throw CheckpointError("Could not load checkpoint ID=")
261 << id << " because no checkpoint by this ID was found";
262 }
263
264 d->load(getArchDatas());
265
266 // Move current to another checkpoint. Anything between head and the
267 // old current_ is fair game for removal if allowed
268 checkpoint_type* rmv = static_cast<checkpoint_type*>(getCurrent_());
269 setCurrent_(d);
270
271 // Restore scheduler tick number
272 if(sched_){
274 }
275
276 // Remove all checkpoints which can be. Stop if the new current_ is
277 // encountered again.
278 // Note that is is OK if current_ was moved to a later position in
279 // the chain. No important checkpoints will be removed. The
280 // important thing is never to remove current_.
281 cleanupChain_(rmv);
282 }
283
292 std::vector<chkpt_id_t> getCheckpointsAt(tick_t t) override {
293 std::vector<chkpt_id_t> results;
294 for(auto& p : chkpts_){
295 const Checkpoint* cp = p.second.get();
296 const checkpoint_type* dcp = static_cast<const checkpoint_type*>(cp);
297 if(cp->getTick() == t && !dcp->isFlaggedDeleted()){
298 results.push_back(cp->getID());
299 }
300 }
301 return results;
302 }
303
312 std::vector<chkpt_id_t> getCheckpoints() override {
313 std::vector<chkpt_id_t> results;
314 for(auto& p : chkpts_){
315 const Checkpoint* cp = p.second.get();
316 const checkpoint_type* dcp = static_cast<const checkpoint_type*>(cp);
317 if(!dcp->isFlaggedDeleted()){
318 results.push_back(cp->getID());
319 }
320 }
321 return results;
322 }
323
327 uint32_t getNumCheckpoints() const noexcept override {
328 return num_alive_checkpoints_;
329 }
330
334 uint32_t getNumSnapshots() const noexcept {
335 return num_alive_snapshots_;
336 }
337
341 uint32_t getNumDeltas() const noexcept {
343 }
344
349 uint32_t getNumDeadCheckpoints() const noexcept {
350 return num_dead_checkpoints_;
351 }
352
368 std::deque<chkpt_id_t> getCheckpointChain(chkpt_id_t id) override {
369 std::deque<chkpt_id_t> results;
370 if(!getHead()){
371 return results;
372 }
373 const checkpoint_type* d = findCheckpoint_(id);
374 if(!d){
375 throw CheckpointError("There is no checkpoint with ID ") << id;
376 }
377 while(d){
378 results.push_back(d->getID());
379 d = static_cast<checkpoint_type*>(d->getPrev());
380 }
381 return results;
382 }
383
402 chkpt_id_t from) {
404 if(!d){
405 throw CheckpointError("There is no checkpoint with ID ") << from;
406 }
407
408 // Search backward
409 do{
410 if(d->getTick() <= tick){
411 break;
412 }
413 d = static_cast<checkpoint_type*>(d->getPrev());
414 }while(d);
415
416 return d;
417 }
418
426 auto it = chkpts_.find(id);
427 if (it != chkpts_.end()) {
428 return static_cast<checkpoint_type*>(it->second.get());
429 }
430 return nullptr;
431 }
432
440 bool hasCheckpoint(chkpt_id_t id) noexcept override {
441 return chkpts_.find(id) != chkpts_.end();
442 }
443
447 std::vector<chkpt_id_t> getNextIDs(chkpt_id_t id) override final {
448 std::vector<chkpt_id_t> next_ids;
449 if (const auto chkpt = findCheckpoint_(id)) {
450 for (const auto next : chkpt->getNexts()) {
451 const auto dcp = static_cast<checkpoint_type*>(next);
452 if (!dcp->isFlaggedDeleted()) {
453 next_ids.push_back(next->getID());
454 }
455 }
456 }
457 return next_ids;
458 }
459
462
466
470 std::string stringize() const override {
471 std::stringstream ss;
472 ss << "<FastCheckpointer on ";
473 for (size_t i = 0; i < getRoots().size(); ++i) {
474 TreeNode* root = getRoots()[i];
475 if (i != 0) {
476 ss << ", ";
477 }
478 ss << root->getLocation();
479 }
480 ss << '>';
481 return ss.str();
482 }
483
489 void dumpList(std::ostream& o) override {
490 for(auto& cp : chkpts_){
491 o << cp.second->stringize() << std::endl;
492 }
493 }
494
500 void dumpData(std::ostream& o) override {
501 for(auto& cp : chkpts_){
502 cp.second->dumpData(o);
503 o << std::endl;
504 }
505 }
506
513 void dumpAnnotatedData(std::ostream& o) override {
514 for(auto& cp : chkpts_){
515 o << cp.second->stringize() << std::endl;
516 cp.second->dumpData(o);
517 o << std::endl;
518 }
519 }
520
524 void traceValue(std::ostream& o, chkpt_id_t id, const ArchData* container, uint32_t offset, uint32_t size) override {
526 o << "trace: Searching for 0x" << std::hex << offset << " (" << std::dec << size
527 << " bytes) in ArchData " << (const void*)container << " when loading checkpoint "
528 << std::dec << id << std::endl;
529 if(!dcp){
530 o << "trace: Checkpoint " << id << " not found" << std::endl;
531 }else{
532 dcp->traceValue(o, getArchDatas(), container, offset, size);
533 }
534 }
535
538
539 protected:
540
554
555 // In order to truly delete any checkpoints, we must traverse back
556 // to the previous snapshot (or the head) and forward to the another
557 // snapshot or the end of the chain.
558 // ONLY if both of those points can be reached without encountering
559 // a living checkpoint or the current checkpoint (forward
560 // only) can the whole chain (including the leading shapshot) be
561 // deleted.
562
564
565 if(d == getHead()){
566 // Cannot delete head of checkpoint tree
567 return;
568 }
569
570 // Walk forward to another snapshot or current
571 const bool needed_later = (getCurrent_() == d) || recursForwardFindAlive_(d);
572 if(needed_later) {
573 // Cannot delete because a later living checkpoint (or current) depends on this
574 if(d->isSnapshot()){
575 // This snapshot is needed later. Move to previous delta and work from there
576 d = static_cast<checkpoint_type*>(d->getPrev());
577 }else{
578 return; // This delta is needed. Therefore all preceeding deltas are needed
579 }
580 }
581
582 // Delete backward until current, head, or a non-flagged-deleted checkpoint is hit.
583 // It is possible to fracture the checkpoint tree by deleting a segment
584 // between two snapshots, so prev can end up with nothing leading up to it
585 while(d && d != getHead() && d->isFlaggedDeleted()){
586
587 // If the checkpoint to delete is the current checkpoint, then
588 // We cannot just set current to the previous checkpoint because
589 // we may have run forward and storing a checkpoint in the
590 // future would depend on the checkpoint we are about to delete.
591 // This could be fixed by requiring the next checkpoint to be a
592 // spapshot. Instead, point to the flagged-deleted checkpoint
593 // and do not delete
594 if(getCurrent_() == d){
595 return;
596 }
597
598 checkpoint_type* prev = static_cast<checkpoint_type*>(d->getPrev());
599
600 // If nothing later in the chain (tree) depends on d's data, it can be deleted.
601 // This also patches the checkpoint tree around the deleted checkpoint
603 if(d->canDelete()) {
604 // Get checkpoint id regardless of whether alive or dead
605 chkpt_id_t id = d->getID();
606 if (d->isFlaggedDeleted()) {
607 id = d->getDeletedID();
608 }
609
610 num_dead_checkpoints_--;
611
612 // Erase element in the map
613 auto itr = chkpts_.find(id);
614 sparta_assert(itr != chkpts_.end());
615 chkpts_.erase(itr);
616 }
617
618 d = prev; // Continue until head is reached
619 }
620 }
621
632 {
633 const std::vector<Checkpoint*> & nexts = d->getNexts();
634 for(const auto & chkpt : nexts)
635 {
636 checkpoint_type* dc = static_cast<checkpoint_type*>(chkpt);
637 // Only check descendants for snapshot-ness
638 if(dc->isSnapshot()){
639 // Found a live snapshot that ends this branch. d is not needed
640 // after this
641 return false;
642 }
643 if(dc == getCurrent_()){
644 // Found current in this search chain
645 return true;
646 }
647 if(dc->isFlaggedDeleted() == false){
648 // Encountered a checkpoint later in the chain that still
649 // depends on this.
650 return true;
651 }
652
653 // Continue the search recursively
655 return true;
656 }
657 }
658
659 // Found nothing alive.
660 return false;
661 }
662
671 auto itr = chkpts_.find(id);
672 if (itr != chkpts_.end()) {
673 return static_cast<checkpoint_type*>(itr->second.get());
674 }
675 return nullptr;
676 }
677
681 const checkpoint_type* findCheckpoint_(chkpt_id_t id) const noexcept {
682 auto itr = chkpts_.find(id);
683 if (itr != chkpts_.end()) {
684 return static_cast<checkpoint_type*>(itr->second.get());
685 }
686 return nullptr;
687 }
688
692 void dumpCheckpointNode_(const chkpt_id_t id, std::ostream& o) override {
693 static std::string SNAPSHOT_NOTICE = "(s)";
694 auto cp = findCheckpoint_(id);
695
696 // Draw data for this checkpoint
697 if(cp->isFlaggedDeleted()){
698 o << cp->getDeletedRepr();
699 }else{
700 o << cp->getID();
701 }
702 // Show that this is a snapshot
703 if(cp->isSnapshot()){
704 o << ' ' << SNAPSHOT_NOTICE;
705 }
706 }
707
708 private:
709
713 void createHead_() override {
714 tick_t tick = 0;
715 if(sched_){
716 tick = sched_->getCurrentTick();
717 }
718
719 if(getHead()){
720 throw CheckpointError("Cannot create head at ")
721 << tick << " because a head already exists in this checkpointer";
722 }
723 for (auto root : getRoots()) {
724 if(root->isFinalized() == false){
725 CheckpointError exc("Cannot create a checkpoint until the tree is finalized. Attempting to checkpoint from node ");
726 exc << root->getLocation() << " at tick ";
727 if(sched_){
728 exc << tick;
729 }else{
730 exc << "<no scheduler>";
731 }
732 throw exc;
733 }
734 }
735
736 checkpoint_type* dcp = new checkpoint_type(getArchDatas(), next_chkpt_id_++, tick, nullptr, true);
737 chkpts_[dcp->getID()].reset(dcp);
738 setHead_(dcp);
739 num_alive_checkpoints_++;
740 num_alive_snapshots_++;
741 setCurrent_(dcp);
742 }
743
744 chkpt_id_t createCheckpoint_(bool force_snapshot=false) override {
745 bool is_snapshot;
746 checkpoint_type* prev;
747
748 if(next_chkpt_id_ == checkpoint_type::UNIDENTIFIED_CHECKPOINT){
749 throw CheckpointError("Exhausted all ")
750 << checkpoint_type::UNIDENTIFIED_CHECKPOINT << " possible checkpoint IDs. "
751 << "This is likely a gross misuse of checkpointing";
752 }
753
754 // Caller guarantees a head
755 sparta_assert(getHead() != nullptr);
756
757 tick_t tick;
758 if(sched_){
759 tick = sched_->getCurrentTick();
760 }else{
761 tick = 0;
762 }
763
764 if(sched_ && (tick < getHead()->getTick())){
765 throw CheckpointError("Cannot create a new checkpoint at tick ")
766 << tick << " because this tick number is smaller than the tick number of the head checkpoint at: "
767 << getHead()->getTick() << ". The head checkpoint cannot be reset once created, so it should be done "
768 << "at the start of simulation before running. The simulator front-end should do this so this must "
769 << "likely be fixed in the simulator.";
770 }
771
772 if(nullptr == getCurrent_()){
773 // Creating a delta from the head
774 prev = static_cast<checkpoint_type*>(getHead_());
775 is_snapshot = false;
776 }else{
777 if(sched_ && (tick < getCurrent_()->getTick())){
778 throw CheckpointError("Current tick number from sparta scheduler (")
779 << tick << " ) is less than the current checkpoint's tick number ("
780 << getCurrent_()->getTick() << " To create a checkpoint with an earlier tick number, an "
781 << "older checkpoint having a tick number <= the tick number specified here must first be "
782 << "loaded";
783 }
784
785 // Find latest checkpoint <= tick
786
787 prev = static_cast<checkpoint_type*>(getCurrent_());
788 is_snapshot = prev->getDistanceToPrevSnapshot() >= getSnapshotThreshold();
789 }
790
791 checkpoint_type* dcp = new checkpoint_type(getArchDatas(), // Created during createHead
792 next_chkpt_id_++,
793 tick,
794 prev,
795 force_snapshot || is_snapshot);
796 chkpts_[dcp->getID()].reset(dcp);
797 num_alive_checkpoints_++;
798 num_alive_snapshots_ += (dcp->isSnapshot() == true);
799 setCurrent_(dcp);
800
801 if (dcp->isSnapshot()){
802 // Clean up starting with this snapshot and moving back.
803 // May have an opportunity to free older deltas right now
804 // (instead of upon next deletion)
805 cleanupChain_(dcp);
806 }
807
808 return dcp->getID();
809 }
810
819 std::map<chkpt_id_t, std::unique_ptr<Checkpoint>> chkpts_;
820
825 uint32_t snap_thresh_;
826
830 chkpt_id_t next_chkpt_id_;
831
836 uint32_t num_alive_checkpoints_;
837
844 uint32_t num_alive_snapshots_;
845
850 uint32_t num_dead_checkpoints_;
851 };
852
853} // namespace sparta::serialization::checkpoint
Set of macros for Sparta assertions. Caught by the framework.
#define sparta_assert(...)
Simple variadic assertion that will throw a sparta_exception if the condition fails.
Exception class for all of Sparta.
Basic Node framework in sparta device tree composite pattern.
Contains a set of contiguous line of architectural data which can be referred to by any architected o...
Definition ArchData.hpp:39
A class that lets you schedule events now and in the future.
Tick getCurrentTick() const noexcept
The current tick the Scheduler is working on or just finished.
void restartAt(Tick t)
Clears the events in the scheduler, sets the current tick to tick and the elapsed ticks to either tic...
Node in a composite tree representing a sparta Tree item.
Definition TreeNode.hpp:205
std::string getLocation() const override final
chkpt_id_t getID() const noexcept
Returns the ID of this checkpoint.
tick_t getTick() const noexcept
Returns the tick number at which this checkpoint was taken.
static const chkpt_id_t UNIDENTIFIED_CHECKPOINT
Indicates unidentified checkpoint (could mean 'invalid' or 'any') depending on context.
Indicates that there was an issue operating on checkpoints within the SPARTA framework.
Single checkpoint object interface with a tick number and an ID unique to the owning Checkpointer ins...
Checkpoint * getPrev() const noexcept
Returns the previous checkpoint. If this checkpoint is a snapshot, it has no previous checkpoint.
const std::vector< Checkpoint * > & getNexts() const noexcept
Returns next checkpoint following *this. May be an empty vector if there are no later checkpoints fol...
Checkpointer interface. Defines an ID-based checkpointing API for tree of related checkpoints which c...
const std::vector< TreeNode * > & getRoots() const noexcept
Returns the root(s) associated with this checkpointer.
Checkpoint::chkpt_id_t chkpt_id_t
tick_t Tick type to which checkpoints will refer
const std::vector< ArchData * > & getArchDatas() const
Returns ArchDatas enumerated by this Checkpointer for iteration when saving or loading checkpoint dat...
void setCurrent_(CheckpointBase *current)
Sets the current checkpoint pointer.
Scheduler *const sched_
Scheduler whose tick count will be set and read. Cannnot be updated after first checkpoint without ba...
CheckpointBase * getHead_() noexcept
Non-const variant of getHead_.
const CheckpointBase * getHead() const noexcept
Returns the head checkpoint which is equivalent to the earliest checkpoint taken.
void setHead_(CheckpointBase *head)
Sets the head checkpointer pointer to head for the first time.
Checkpoint::tick_t tick_t
tick_t Tick type to which checkpoints will refer
tick_t getCurrentTick() const
Gets the tick number of the current checkpoint (see getCurrentID). This is the tick number of the lat...
CheckpointBase * getCurrent_() const noexcept
Gets the current checkpointer pointer. Returns nullptr if there is no current checkpoint object.
Single delta checkpoint object containing all simulator state which changed since some previous Delta...
chkpt_id_t getDeletedID() const noexcept
Return the ID had by this checkpoint before it was deleted If this checkpoint has not been flagged fo...
bool isSnapshot() const noexcept
Is this checkpoint a snapshot (contains ALL simulator state)
bool isFlaggedDeleted() const noexcept
Indicates whether this checkpoint has been flagged deleted.
virtual void load(const std::vector< ArchData * > &dats) override
Attempts to restore this checkpoint including any previous deltas (dependencies).
bool canDelete() const noexcept
Can this checkpoint be deleted Cannot be deleted if:
void traceValue(std::ostream &o, const std::vector< ArchData * > &dats, const ArchData *container, uint32_t offset, uint32_t size)
Implement trace of a value across the restore chain as described in Checkpointer::traceValue.
void flagDeleted()
Allows this checkpoint to be deleted if it is no longer a previous delta of some other delta (i....
Implements quick checkpointing through delta-checkpoint trees which store state-deltas in a compact f...
std::vector< chkpt_id_t > getNextIDs(chkpt_id_t id) override final
Returns IDs of the checkpoints immediately following the given checkpoint.
const checkpoint_type * findCheckpoint(chkpt_id_t id) noexcept
Finds a checkpoint by its ID.
FastCheckpointer(TreeNode &root, Scheduler *sched=nullptr)
FastCheckpointer Constructor.
void deleteCheckpoint(chkpt_id_t id) override
Deletes a checkpoint by ID.
uint64_t getTotalMemoryUse() const noexcept override
Computes and returns the memory usage by this checkpointer at this moment including any framework ove...
void setSnapshotThreshold(uint32_t thresh) noexcept
Sets the snapshot threshold.
uint32_t getNumCheckpoints() const noexcept override
Gets the current number of checkpoints having valid IDs.
std::string stringize() const override
Returns a string describing this object.
void cleanupChain_(checkpoint_type *d)
Delete given checkpoint and all contiguous previous checkpoints which can be deleted (See checkpoint_...
std::deque< chkpt_id_t > getCheckpointChain(chkpt_id_t id) override
Debugging utility which gets a deque of checkpoints representing a chain starting at the checkpoint h...
checkpoint_type * findCheckpoint_(chkpt_id_t id) noexcept
Attempts to find a checkpoint within this checkpointer by ID.
uint32_t getSnapshotThreshold() const noexcept
Returns the next-shapshot threshold.
checkpoint_type * findLatestCheckpointAtOrBefore(tick_t tick, chkpt_id_t from)
Finds the latest checkpoint at or before the given tick starting at the from checkpoint and working b...
uint64_t getContentMemoryUse() const noexcept override
Computes and returns the memory usage by this checkpointer at this moment purely for the checkpoint s...
std::vector< chkpt_id_t > getCheckpoints() override
Gets all checkpoint IDs available on any timeline sorted by tick (or equivalently checkpoint ID).
void dumpAnnotatedData(std::ostream &o) override
Dumps this checkpointer's data to an ostream with annotations between each ArchData and a newline fol...
bool hasCheckpoint(chkpt_id_t id) noexcept override
Tests whether this checkpoint manager has a checkpoint with the given id.
const checkpoint_type * findCheckpoint_(chkpt_id_t id) const noexcept
const variant of findCheckpoint_
FastCheckpointer(const std::vector< sparta::TreeNode * > &roots, Scheduler *sched=nullptr)
FastCheckpointer Constructor.
uint32_t getNumDeltas() const noexcept
Gets the current number of delta checkpoints with valid IDs.
void loadCheckpoint(chkpt_id_t id) override
Loads state from a specific checkpoint by ID.
std::vector< chkpt_id_t > getCheckpointsAt(tick_t t) override
Gets all checkpoints taken at tick t on any timeline.
uint32_t getNumSnapshots() const noexcept
Gets the current number of snapshots with valid IDs.
uint32_t getNumDeadCheckpoints() const noexcept
Gets the curent number of checkpoints (delta or snapshot) withOUT valid IDs.
bool recursForwardFindAlive_(checkpoint_type *d) const
Look forward to see if any future checkpoints depend on d.
void dumpList(std::ostream &o) override
Dumps this checkpointer's flat list of checkpoints to an ostream with a newline following each checkp...
void dumpData(std::ostream &o) override
Dumps this checkpointer's data to an ostream with a newline following each checkpoint.
void dumpCheckpointNode_(const chkpt_id_t id, std::ostream &o) override
Implements Checkpointer::dumpCheckpointNode_.
void traceValue(std::ostream &o, chkpt_id_t id, const ArchData *container, uint32_t offset, uint32_t size) override
Forwards debug/trace info onto checkpoint by ID.