// SPDX-License-Identifier: GPL-2.0
/*
* bcachefs journalling code, for btree insertions
*
* Copyright 2012 Google, Inc.
*/
#include "bcachefs.h"
#include "alloc_foreground.h"
#include "bkey_methods.h"
#include "btree_gc.h"
#include "btree_update.h"
#include "btree_write_buffer.h"
#include "buckets.h"
#include "error.h"
#include "journal.h"
#include "journal_io.h"
#include "journal_reclaim.h"
#include "journal_sb.h"
#include "journal_seq_blacklist.h"
#include "trace.h"
static const char * const bch2_journal_errors[] = {
#define x(n) #n,
JOURNAL_ERRORS()
#undef x
NULL
};
static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
{
return seq > j->seq_ondisk;
}
static bool __journal_entry_is_open(union journal_res_state state)
{
return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
}
static inline unsigned nr_unwritten_journal_entries(struct journal *j)
{
return atomic64_read(&j->seq) - j->seq_ondisk;
}
static bool journal_entry_is_open(struct journal *j)
{
return __journal_entry_is_open(j->reservations);
}
static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u64 seq)
{
union journal_res_state s = READ_ONCE(j->reservations);
unsigned i = seq & JOURNAL_BUF_MASK;
struct journal_buf *buf = j->buf + i;
prt_printf(out, "seq:\t%llu\n", seq);
printbuf_indent_add(out, 2);
prt_printf(out, "refcount:\t%u\n", journal_state_count(s, i));
prt_printf(out, "size:\t");
prt_human_readable_u64(out, vstruct_bytes(buf->data));
prt_newline(out);
prt_printf(out, "expires:\t");
prt_printf(out, "%li jiffies\n", buf->expires - jiffies);
prt_printf(out, "flags:\t");
if (buf->noflush)
prt_str(out, "noflush ");
if (buf->must_flush)
prt_str(out, "must_flush ");
if (buf->separate_flush)
prt_str(out, "separate_flush ");
if (buf->need_flush_to_write_buffer)
prt_str(out, "need_flush_to_write_buffer ");
if (buf->write_started)
prt_str(out, "write_started ");
if (buf->write_allocated)
prt_str(out, "write_allocated ");
if (buf->write_done)
prt_str(out, "write_done");
prt_newline(out);
printbuf_indent_sub(out, 2);
}
static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j)
{
if (!out->nr_tabstops)
printbuf_tabstop_push(out, 24);
for (u64 seq = journal_last_unwritten_seq(j);
seq <= journal_cur_seq(j);
seq++)
bch2_journal_buf_to_text(out, j, seq);
prt_printf(out, "last buf %s\n", journal_entry_is_open(j) ? "open" : "closed");
}
static inline struct journal_buf *
journal_seq_to_buf(struct journal *j, u64 seq)
{
struct journal_buf *buf = NULL;
EBUG_ON(seq > journal_cur_seq(j));
if (journal_seq_unwritten(j, seq)) {
buf = j->buf + (seq & JOURNAL_BUF_MASK);
EBUG_ON(le64_to_cpu(buf->data->seq) != seq);
}
return buf;
}
static void journal_pin_list_init(struct journal_entry_pin_list *p, int count)
{
unsigned i;
for (i = 0; i < ARRAY_SIZE(p->list); i++)
INIT_LIST_HEAD(&p->list[i]);
INIT_LIST_HEAD(&p->flushed);
atomic_set(&p->count, count);
p->devs.nr = 0;
}
/*
* Detect stuck journal conditions and trigger shutdown. Technically the journal
* can end up stuck for a variety of reasons, such as a blocked I/O, journal
* reservation lockup, etc. Since this is a fatal error with potentially
* unpredictable characteristics, we want to be fairly conservative before we
* decide to shut things down.
*
* Consider the journal stuck when it appears full with no ability to commit
* btree transactions, to discard journal buckets, nor acquire priority
* (reserved watermark) reservation.
*/
static inline bool
journal_error_check_stuck(struct journal *j, int error, unsigned flags)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
bool stuck = false;
struct printbuf buf = PRINTBUF;
if (!(error == JOURNAL_ERR_journal_full ||
error == JOURNAL_ERR_journal_pin_full) ||
nr_unwritten_journal_entries(j) ||
(flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim)
return stuck;
spin_lock(&j->lock);
if (j->can_discard) {
spin_unlock(&j->lock);
return stuck;
}
stuck = true;
/*
* The journal shutdown path will set ->err_seq, but do it here first to
* serialize against concurrent failures and avoid duplicate error
* reports.
*/
if (j->err_seq) {
spin_unlock(&j->lock);
return stuck;
}
j->err_seq = journal_cur_seq(j);
spin_unlock(&j->lock);
bch_err(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)",
bch2_journal_errors[error]);
bch2_journal_debug_to_text(&buf, j);
bch_err(c, "%s", buf.buf);
printbuf_reset(&buf);
bch2_journal_pins_to_text(&buf, j);
bch_err(c, "Journal pins:\n%s", buf.buf);
printbuf_exit(&buf);
bch2_fatal_error(c);
dump_stack();
return stuck;
}
void bch2_journal_do_writes(struct journal *j)
{
for (u64 seq = journal_last_unwritten_seq(j);
seq <= journal_cur_seq(j);
seq++) {
unsigned idx = seq & JOURNAL_BUF_MASK;
struct journal_buf *w = j->buf + idx;
if (w->write_started && !w->write_allocated)
break;
if (w->write_started)
continue;
if (!journal_state_count(j->reservations, idx)) {
w->write_started = true;
closure_call(&w->io, bch2_journal_write, j->wq, NULL);
}
break;
}
}
/*
* Final processing when the last reference of a journal buffer has been
* dropped. Drop the pin list reference acquired at journal entry open and write
* the buffer, if requested.
*/
void bch2_journal_buf_put_final(struct journal *j, u64 seq)
{
lockdep_assert_held(&j->lock);
if (__bch2_journal_pin_put(j, seq))
bch2_journal_reclaim_fast(j);
bch2_journal_do_writes(j);
}
/*
* Returns true if journal entry is now closed:
*
* We don't close a journal_buf until the next journal_buf is finished writing,
* and can be opened again - this also initializes the next journal_buf:
*/
static void __journal_entry_close(struct journal *j, unsigned closed_val, bool trace)
{
struct bch_fs *c =
|