// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "btree_key_cache.h"
#include "btree_update.h"
#include "btree_write_buffer.h"
#include "buckets.h"
#include "errcode.h"
#include "error.h"
#include "journal.h"
#include "journal_io.h"
#include "journal_reclaim.h"
#include "replicas.h"
#include "sb-members.h"
#include "trace.h"
#include <linux/kthread.h>
#include <linux/sched/mm.h>
/* Free space calculations: */
static unsigned journal_space_from(struct journal_device *ja,
enum journal_space_from from)
{
switch (from) {
case journal_space_discarded:
return ja->discard_idx;
case journal_space_clean_ondisk:
return ja->dirty_idx_ondisk;
case journal_space_clean:
return ja->dirty_idx;
default:
BUG();
}
}
unsigned bch2_journal_dev_buckets_available(struct journal *j,
struct journal_device *ja,
enum journal_space_from from)
{
if (!ja->nr)
return 0;
unsigned available = (journal_space_from(ja, from) -
ja->cur_idx - 1 + ja->nr) % ja->nr;
/*
* Don't use the last bucket unless writing the new last_seq
* will make another bucket available:
*/
if (available && ja->dirty_idx_ondisk == ja->dirty_idx)
--available;
return available;
}
void bch2_journal_set_watermark(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
bool low_on_space = j->space[journal_space_clean].total * 4 <=
j->space[journal_space_total].total;
bool low_on_pin = fifo_free(&j->pin) < j->pin.size / 4;
bool low_on_wb = bch2_btree_write_buffer_must_wait(c);
unsigned watermark = low_on_space || low_on_pin || low_on_wb
? BCH_WATERMARK_reclaim
: BCH_WATERMARK_stripe;
if (track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_space], low_on_space) ||
track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_pin], low_on_pin) ||
track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full], low_on_wb))
trace_and_count(c, journal_full, c);
mod_bit(JOURNAL_space_low, &j->flags, low_on_space || low_on_pin);
swap(watermark, j->watermark);
if (watermark > j->watermark)
journal_wake(j);
}
static struct journal_space
journal_dev_space_available(struct journal *j, struct bch_dev *ca,
enum journal_space_from from)
{
struct journal_device *ja = &ca->journal;
unsigned sectors, buckets, unwritten;
u64 seq;
if (from == journal_space_total)
return (struct journal_space) {
.next_entry = ca->mi.bucket_size,
.total = ca->mi.bucket_size * ja->nr,
};
buckets = bch2_journal_dev_buckets_available(j, ja, from);
sectors = ja->sectors_free;
/*
* We that we don't allocate the space for a journal entry
* until we write it out - thus, account for it here:
*/
for (seq = journal_last_unwritten_seq(j);
seq <= journal_cur_seq(j);
seq++) {
unwritten = j->buf[seq & JOURNAL_BUF_MASK].sectors;
if (!unwritten)
continue;
/* entry won't fit on this device, skip: */
if (unwritten > ca->mi.bucket_size)
continue;
if (unwritten >= sectors) {
if (!buckets) {
sectors = 0;
break;
}
buckets--;
sectors = ca->mi.bucket_size;
}
sectors -= unwritten;
}
if (sectors < ca->mi.bucket_size && buckets) {
buckets--;
sectors = ca->mi.bucket_size;
}
return (struct journal_space) {
.next_entry = sectors,
.total = sectors + buckets * ca->mi.bucket_size,
};
}
static struct journal_space __journal_space_available(struct journal *j, unsigned nr_devs_want,
enum journal_space_from from)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
unsigned pos, nr_devs = 0;
struct journal_space space, dev_space[BCH_SB_MEMBERS_MAX];
unsigned min_bucket_size = U32_MAX;
BUG_ON(nr_devs_want > ARRAY_SIZE(dev_space));
rcu_read_lock();
for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) {
if (!ca->journal.nr ||
!ca->mi.durability)
continue;
min_bucket_size = min(min_bucket_size, ca->mi.bucket_size);
space = journal_dev_space_available(j, ca, from);
if (!space.next_entry)
continue;
for (pos = 0; pos < nr_devs; pos++)
if (space.total > dev_space[pos].total)
break;
array_insert_item(dev_space, nr_devs, pos, space);
}
rcu_read_unlock();
if (nr_devs < nr_devs_want)
return (struct journal_space) { 0, 0 };
/*