From c39de951282df9a60ef70664e4378d88006b2670 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:04:53 +0300 Subject: [PATCH 0001/1405] fs/ntfs3: Improve alternative boot processing Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 9153dffde950c..09d61c6c90aaf 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -866,6 +866,7 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, u16 fn, ao; u8 cluster_bits; u32 boot_off = 0; + sector_t boot_block = 0; const char *hint = "Primary boot"; /* Save original dev_size. Used with alternative boot. */ @@ -873,11 +874,11 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, sbi->volume.blocks = dev_size >> PAGE_SHIFT; - bh = ntfs_bread(sb, 0); +read_boot: + bh = ntfs_bread(sb, boot_block); if (!bh) - return -EIO; + return boot_block ? -EINVAL : -EIO; -check_boot: err = -EINVAL; /* Corrupted image; do not read OOB */ @@ -1108,26 +1109,24 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, } out: - if (err == -EINVAL && !bh->b_blocknr && dev_size0 > PAGE_SHIFT) { + brelse(bh); + + if (err == -EINVAL && !boot_block && dev_size0 > PAGE_SHIFT) { u32 block_size = min_t(u32, sector_size, PAGE_SIZE); u64 lbo = dev_size0 - sizeof(*boot); - /* - * Try alternative boot (last sector) - */ - brelse(bh); - - sb_set_blocksize(sb, block_size); - bh = ntfs_bread(sb, lbo >> blksize_bits(block_size)); - if (!bh) - return -EINVAL; - + boot_block = lbo >> blksize_bits(block_size); boot_off = lbo & (block_size - 1); - hint = "Alternative boot"; - dev_size = dev_size0; /* restore original size. */ - goto check_boot; + if (boot_block && block_size >= boot_off + sizeof(*boot)) { + /* + * Try alternative boot (last sector) + */ + sb_set_blocksize(sb, block_size); + hint = "Alternative boot"; + dev_size = dev_size0; /* restore original size. */ + goto read_boot; + } } - brelse(bh); return err; } -- GitLab From 22457c047ed971f2f2e33be593ddfabd9639a409 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:17:07 +0300 Subject: [PATCH 0002/1405] fs/ntfs3: Modified fix directory element type detection Unfortunately reparse attribute is used for many purposes (several dozens). It is not possible here to know is this name symlink or not. To get exactly the type of name we should to open inode (read mft). getattr for opened file (fstat) correctly returns symlink. Signed-off-by: Konstantin Komarov --- fs/ntfs3/dir.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index ec0566b322d5d..22ede4da04502 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -309,11 +309,31 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, return 0; } - /* NTFS: symlinks are "dir + reparse" or "file + reparse" */ - if (fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) - dt_type = DT_LNK; - else - dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; + /* + * NTFS: symlinks are "dir + reparse" or "file + reparse" + * Unfortunately reparse attribute is used for many purposes (several dozens). + * It is not possible here to know is this name symlink or not. + * To get exactly the type of name we should to open inode (read mft). + * getattr for opened file (fstat) correctly returns symlink. + */ + dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; + + /* + * It is not reliable to detect the type of name using duplicated information + * stored in parent directory. + * The only correct way to get the type of name - read MFT record and find ATTR_STD. + * The code below is not good idea. + * It does additional locks/reads just to get the type of name. + * Should we use additional mount option to enable branch below? + */ + if ((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) && + ino != ni->mi.rno) { + struct inode *inode = ntfs_iget5(sbi->sb, &e->ref, NULL); + if (!IS_ERR_OR_NULL(inode)) { + dt_type = fs_umode_to_dtype(inode->i_mode); + iput(inode); + } + } return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type); } -- GitLab From 6a799c928b78b14999b7705c4cca0f88e297fe96 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:24:33 +0300 Subject: [PATCH 0003/1405] fs/ntfs3: Improve ntfs_dir_count Signed-off-by: Konstantin Komarov --- fs/ntfs3/dir.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index 22ede4da04502..726122ecd39b4 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -515,11 +515,9 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, struct INDEX_HDR *hdr; const struct ATTR_FILE_NAME *fname; u32 e_size, off, end; - u64 vbo = 0; size_t drs = 0, fles = 0, bit = 0; - loff_t i_size = ni->vfs_inode.i_size; struct indx_node *node = NULL; - u8 index_bits = ni->dir.index_bits; + size_t max_indx = ni->vfs_inode.i_size >> ni->dir.index_bits; if (is_empty) *is_empty = true; @@ -563,7 +561,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, fles += 1; } - if (vbo >= i_size) + if (bit >= max_indx) goto out; err = indx_used_bit(&ni->dir, ni, &bit); @@ -573,8 +571,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, if (bit == MINUS_ONE_T) goto out; - vbo = (u64)bit << index_bits; - if (vbo >= i_size) + if (bit >= max_indx) goto out; err = indx_read(&ni->dir, ni, bit << ni->dir.idx2vbn_bits, @@ -584,7 +581,6 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, hdr = &node->index->ihdr; bit += 1; - vbo = (u64)bit << ni->dir.idx2vbn_bits; } out: -- GitLab From 1918c10e137eae266b8eb0ab1cc14421dcb0e3e2 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:26:31 +0300 Subject: [PATCH 0004/1405] fs/ntfs3: Correct hard links updating when dealing with DOS names Signed-off-by: Konstantin Komarov --- fs/ntfs3/record.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index 53629b1f65e99..7b6423584eaee 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -535,8 +535,20 @@ bool mi_remove_attr(struct ntfs_inode *ni, struct mft_inode *mi, return false; if (ni && is_attr_indexed(attr)) { - le16_add_cpu(&ni->mi.mrec->hard_links, -1); - ni->mi.dirty = true; + u16 links = le16_to_cpu(ni->mi.mrec->hard_links); + struct ATTR_FILE_NAME *fname = + attr->type != ATTR_NAME ? + NULL : + resident_data_ex(attr, + SIZEOF_ATTRIBUTE_FILENAME); + if (fname && fname->type == FILE_NAME_DOS) { + /* Do not decrease links count deleting DOS name. */ + } else if (!links) { + /* minor error. Not critical. */ + } else { + ni->mi.mrec->hard_links = cpu_to_le16(links - 1); + ni->mi.dirty = true; + } } used -= asize; -- GitLab From 85ba2a75faee759809a7e43b4c103ac59bac1026 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:34:24 +0300 Subject: [PATCH 0005/1405] fs/ntfs3: Print warning while fixing hard links count Signed-off-by: Konstantin Komarov --- fs/ntfs3/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 5e3d713749185..fa6c7965473c8 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -412,7 +412,6 @@ static struct inode *ntfs_read_mft(struct inode *inode, goto out; if (!is_match && name) { - /* Reuse rec as buffer for ascii name. */ err = -ENOENT; goto out; } @@ -427,6 +426,7 @@ static struct inode *ntfs_read_mft(struct inode *inode, if (names != le16_to_cpu(rec->hard_links)) { /* Correct minor error on the fly. Do not mark inode as dirty. */ + ntfs_inode_warn(inode, "Correct links count -> %u.", names); rec->hard_links = cpu_to_le16(names); ni->mi.dirty = true; } -- GitLab From 865e7a7700d930d34895a70f8af2eb4e778a5b0e Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:42:04 +0300 Subject: [PATCH 0006/1405] fs/ntfs3: Reduce stack usage Signed-off-by: Konstantin Komarov --- fs/ntfs3/fslog.c | 218 +++++++++++++++++++++-------------------------- 1 file changed, 98 insertions(+), 120 deletions(-) diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 98ccb66508583..7dbb000fc6911 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -974,6 +974,16 @@ static inline void *alloc_rsttbl_from_idx(struct RESTART_TABLE **tbl, u32 vbo) return e; } +struct restart_info { + u64 last_lsn; + struct RESTART_HDR *r_page; + u32 vbo; + bool chkdsk_was_run; + bool valid_page; + bool initialized; + bool restart; +}; + #define RESTART_SINGLE_PAGE_IO cpu_to_le16(0x0001) #define NTFSLOG_WRAPPED 0x00000001 @@ -987,6 +997,7 @@ struct ntfs_log { struct ntfs_inode *ni; u32 l_size; + u32 orig_file_size; u32 sys_page_size; u32 sys_page_mask; u32 page_size; @@ -1040,6 +1051,8 @@ struct ntfs_log { struct CLIENT_ID client_id; u32 client_undo_commit; + + struct restart_info rst_info, rst_info2; }; static inline u32 lsn_to_vbo(struct ntfs_log *log, const u64 lsn) @@ -1105,16 +1118,6 @@ static inline bool verify_client_lsn(struct ntfs_log *log, lsn <= le64_to_cpu(log->ra->current_lsn) && lsn; } -struct restart_info { - u64 last_lsn; - struct RESTART_HDR *r_page; - u32 vbo; - bool chkdsk_was_run; - bool valid_page; - bool initialized; - bool restart; -}; - static int read_log_page(struct ntfs_log *log, u32 vbo, struct RECORD_PAGE_HDR **buffer, bool *usa_error) { @@ -1176,7 +1179,7 @@ static int read_log_page(struct ntfs_log *log, u32 vbo, * restart page header. It will stop the first time we find a * valid page header. */ -static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first, +static int log_read_rst(struct ntfs_log *log, bool first, struct restart_info *info) { u32 skip, vbo; @@ -1192,7 +1195,7 @@ static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first, } /* Loop continuously until we succeed. */ - for (; vbo < l_size; vbo = 2 * vbo + skip, skip = 0) { + for (; vbo < log->l_size; vbo = 2 * vbo + skip, skip = 0) { bool usa_error; bool brst, bchk; struct RESTART_AREA *ra; @@ -1285,22 +1288,17 @@ static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first, /* * Ilog_init_pg_hdr - Init @log from restart page header. */ -static void log_init_pg_hdr(struct ntfs_log *log, u32 sys_page_size, - u32 page_size, u16 major_ver, u16 minor_ver) +static void log_init_pg_hdr(struct ntfs_log *log, u16 major_ver, u16 minor_ver) { - log->sys_page_size = sys_page_size; - log->sys_page_mask = sys_page_size - 1; - log->page_size = page_size; - log->page_mask = page_size - 1; - log->page_bits = blksize_bits(page_size); + log->sys_page_size = log->page_size; + log->sys_page_mask = log->page_mask; log->clst_per_page = log->page_size >> log->ni->mi.sbi->cluster_bits; if (!log->clst_per_page) log->clst_per_page = 1; - log->first_page = major_ver >= 2 ? - 0x22 * page_size : - ((sys_page_size << 1) + (page_size << 1)); + log->first_page = major_ver >= 2 ? 0x22 * log->page_size : + 4 * log->page_size; log->major_ver = major_ver; log->minor_ver = minor_ver; } @@ -1308,12 +1306,11 @@ static void log_init_pg_hdr(struct ntfs_log *log, u32 sys_page_size, /* * log_create - Init @log in cases when we don't have a restart area to use. */ -static void log_create(struct ntfs_log *log, u32 l_size, const u64 last_lsn, +static void log_create(struct ntfs_log *log, const u64 last_lsn, u32 open_log_count, bool wrapped, bool use_multi_page) { - log->l_size = l_size; /* All file offsets must be quadword aligned. */ - log->file_data_bits = blksize_bits(l_size) - 3; + log->file_data_bits = blksize_bits(log->l_size) - 3; log->seq_num_mask = (8 << log->file_data_bits) - 1; log->seq_num_bits = sizeof(u64) * 8 - log->file_data_bits; log->seq_num = (last_lsn >> log->file_data_bits) + 2; @@ -3720,10 +3717,8 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) struct ntfs_sb_info *sbi = ni->mi.sbi; struct ntfs_log *log; - struct restart_info rst_info, rst_info2; - u64 rec_lsn, ra_lsn, checkpt_lsn = 0, rlsn = 0; + u64 rec_lsn, checkpt_lsn = 0, rlsn = 0; struct ATTR_NAME_ENTRY *attr_names = NULL; - struct ATTR_NAME_ENTRY *ane; struct RESTART_TABLE *dptbl = NULL; struct RESTART_TABLE *trtbl = NULL; const struct RESTART_TABLE *rt; @@ -3741,9 +3736,7 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) struct TRANSACTION_ENTRY *tr; struct DIR_PAGE_ENTRY *dp; u32 i, bytes_per_attr_entry; - u32 l_size = ni->vfs_inode.i_size; - u32 orig_file_size = l_size; - u32 page_size, vbo, tail, off, dlen; + u32 vbo, tail, off, dlen; u32 saved_len, rec_len, transact_id; bool use_second_page; struct RESTART_AREA *ra2, *ra = NULL; @@ -3758,52 +3751,50 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) u16 t16; u32 t32; - /* Get the size of page. NOTE: To replay we can use default page. */ -#if PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <= DefaultLogPageSize * 2 - page_size = norm_file_page(PAGE_SIZE, &l_size, true); -#else - page_size = norm_file_page(PAGE_SIZE, &l_size, false); -#endif - if (!page_size) - return -EINVAL; - log = kzalloc(sizeof(struct ntfs_log), GFP_NOFS); if (!log) return -ENOMEM; log->ni = ni; - log->l_size = l_size; - log->one_page_buf = kmalloc(page_size, GFP_NOFS); + log->l_size = log->orig_file_size = ni->vfs_inode.i_size; + + /* Get the size of page. NOTE: To replay we can use default page. */ +#if PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <= DefaultLogPageSize * 2 + log->page_size = norm_file_page(PAGE_SIZE, &log->l_size, true); +#else + log->page_size = norm_file_page(PAGE_SIZE, &log->l_size, false); +#endif + if (!log->page_size) { + err = -EINVAL; + goto out; + } + log->one_page_buf = kmalloc(log->page_size, GFP_NOFS); if (!log->one_page_buf) { err = -ENOMEM; goto out; } - log->page_size = page_size; - log->page_mask = page_size - 1; - log->page_bits = blksize_bits(page_size); + log->page_mask = log->page_size - 1; + log->page_bits = blksize_bits(log->page_size); /* Look for a restart area on the disk. */ - memset(&rst_info, 0, sizeof(struct restart_info)); - err = log_read_rst(log, l_size, true, &rst_info); + err = log_read_rst(log, true, &log->rst_info); if (err) goto out; /* remember 'initialized' */ - *initialized = rst_info.initialized; + *initialized = log->rst_info.initialized; - if (!rst_info.restart) { - if (rst_info.initialized) { + if (!log->rst_info.restart) { + if (log->rst_info.initialized) { /* No restart area but the file is not initialized. */ err = -EINVAL; goto out; } - log_init_pg_hdr(log, page_size, page_size, 1, 1); - log_create(log, l_size, 0, get_random_u32(), false, false); - - log->ra = ra; + log_init_pg_hdr(log, 1, 1); + log_create(log, 0, get_random_u32(), false, false); ra = log_create_ra(log); if (!ra) { @@ -3820,25 +3811,26 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) * If the restart offset above wasn't zero then we won't * look for a second restart. */ - if (rst_info.vbo) + if (log->rst_info.vbo) goto check_restart_area; - memset(&rst_info2, 0, sizeof(struct restart_info)); - err = log_read_rst(log, l_size, false, &rst_info2); + err = log_read_rst(log, false, &log->rst_info2); if (err) goto out; /* Determine which restart area to use. */ - if (!rst_info2.restart || rst_info2.last_lsn <= rst_info.last_lsn) + if (!log->rst_info2.restart || + log->rst_info2.last_lsn <= log->rst_info.last_lsn) goto use_first_page; use_second_page = true; - if (rst_info.chkdsk_was_run && page_size != rst_info.vbo) { + if (log->rst_info.chkdsk_was_run && + log->page_size != log->rst_info.vbo) { struct RECORD_PAGE_HDR *sp = NULL; bool usa_error; - if (!read_log_page(log, page_size, &sp, &usa_error) && + if (!read_log_page(log, log->page_size, &sp, &usa_error) && sp->rhdr.sign == NTFS_CHKD_SIGNATURE) { use_second_page = false; } @@ -3846,52 +3838,43 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) } if (use_second_page) { - kfree(rst_info.r_page); - memcpy(&rst_info, &rst_info2, sizeof(struct restart_info)); - rst_info2.r_page = NULL; + kfree(log->rst_info.r_page); + memcpy(&log->rst_info, &log->rst_info2, + sizeof(struct restart_info)); + log->rst_info2.r_page = NULL; } use_first_page: - kfree(rst_info2.r_page); + kfree(log->rst_info2.r_page); check_restart_area: /* * If the restart area is at offset 0, we want * to write the second restart area first. */ - log->init_ra = !!rst_info.vbo; + log->init_ra = !!log->rst_info.vbo; /* If we have a valid page then grab a pointer to the restart area. */ - ra2 = rst_info.valid_page ? - Add2Ptr(rst_info.r_page, - le16_to_cpu(rst_info.r_page->ra_off)) : + ra2 = log->rst_info.valid_page ? + Add2Ptr(log->rst_info.r_page, + le16_to_cpu(log->rst_info.r_page->ra_off)) : NULL; - if (rst_info.chkdsk_was_run || + if (log->rst_info.chkdsk_was_run || (ra2 && ra2->client_idx[1] == LFS_NO_CLIENT_LE)) { bool wrapped = false; bool use_multi_page = false; u32 open_log_count; /* Do some checks based on whether we have a valid log page. */ - if (!rst_info.valid_page) { - open_log_count = get_random_u32(); - goto init_log_instance; - } - open_log_count = le32_to_cpu(ra2->open_log_count); - - /* - * If the restart page size isn't changing then we want to - * check how much work we need to do. - */ - if (page_size != le32_to_cpu(rst_info.r_page->sys_page_size)) - goto init_log_instance; + open_log_count = log->rst_info.valid_page ? + le32_to_cpu(ra2->open_log_count) : + get_random_u32(); -init_log_instance: - log_init_pg_hdr(log, page_size, page_size, 1, 1); + log_init_pg_hdr(log, 1, 1); - log_create(log, l_size, rst_info.last_lsn, open_log_count, - wrapped, use_multi_page); + log_create(log, log->rst_info.last_lsn, open_log_count, wrapped, + use_multi_page); ra = log_create_ra(log); if (!ra) { @@ -3916,28 +3899,27 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) * use the log file. We must use the system page size instead of the * default size if there is not a clean shutdown. */ - t32 = le32_to_cpu(rst_info.r_page->sys_page_size); - if (page_size != t32) { - l_size = orig_file_size; - page_size = - norm_file_page(t32, &l_size, t32 == DefaultLogPageSize); + t32 = le32_to_cpu(log->rst_info.r_page->sys_page_size); + if (log->page_size != t32) { + log->l_size = log->orig_file_size; + log->page_size = norm_file_page(t32, &log->l_size, + t32 == DefaultLogPageSize); } - if (page_size != t32 || - page_size != le32_to_cpu(rst_info.r_page->page_size)) { + if (log->page_size != t32 || + log->page_size != le32_to_cpu(log->rst_info.r_page->page_size)) { err = -EINVAL; goto out; } /* If the file size has shrunk then we won't mount it. */ - if (l_size < le64_to_cpu(ra2->l_size)) { + if (log->l_size < le64_to_cpu(ra2->l_size)) { err = -EINVAL; goto out; } - log_init_pg_hdr(log, page_size, page_size, - le16_to_cpu(rst_info.r_page->major_ver), - le16_to_cpu(rst_info.r_page->minor_ver)); + log_init_pg_hdr(log, le16_to_cpu(log->rst_info.r_page->major_ver), + le16_to_cpu(log->rst_info.r_page->minor_ver)); log->l_size = le64_to_cpu(ra2->l_size); log->seq_num_bits = le32_to_cpu(ra2->seq_num_bits); @@ -3945,7 +3927,7 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) log->seq_num_mask = (8 << log->file_data_bits) - 1; log->last_lsn = le64_to_cpu(ra2->current_lsn); log->seq_num = log->last_lsn >> log->file_data_bits; - log->ra_off = le16_to_cpu(rst_info.r_page->ra_off); + log->ra_off = le16_to_cpu(log->rst_info.r_page->ra_off); log->restart_size = log->sys_page_size - log->ra_off; log->record_header_len = le16_to_cpu(ra2->rec_hdr_len); log->ra_size = le16_to_cpu(ra2->ra_len); @@ -4045,7 +4027,7 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) log->current_avail = current_log_avail(log); /* Remember which restart area to write first. */ - log->init_ra = rst_info.vbo; + log->init_ra = log->rst_info.vbo; process_log: /* 1.0, 1.1, 2.0 log->major_ver/minor_ver - short values. */ @@ -4105,7 +4087,7 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) log->client_id.seq_num = cr->seq_num; log->client_id.client_idx = client; - err = read_rst_area(log, &rst, &ra_lsn); + err = read_rst_area(log, &rst, &checkpt_lsn); if (err) goto out; @@ -4114,9 +4096,8 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) bytes_per_attr_entry = !rst->major_ver ? 0x2C : 0x28; - checkpt_lsn = le64_to_cpu(rst->check_point_start); - if (!checkpt_lsn) - checkpt_lsn = ra_lsn; + if (rst->check_point_start) + checkpt_lsn = le64_to_cpu(rst->check_point_start); /* Allocate and Read the Transaction Table. */ if (!rst->transact_table_len) @@ -4330,23 +4311,20 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) lcb = NULL; check_attribute_names2: - if (!rst->attr_names_len) - goto trace_attribute_table; - - ane = attr_names; - if (!oatbl) - goto trace_attribute_table; - while (ane->off) { - /* TODO: Clear table on exit! */ - oe = Add2Ptr(oatbl, le16_to_cpu(ane->off)); - t16 = le16_to_cpu(ane->name_bytes); - oe->name_len = t16 / sizeof(short); - oe->ptr = ane->name; - oe->is_attr_name = 2; - ane = Add2Ptr(ane, sizeof(struct ATTR_NAME_ENTRY) + t16); - } - -trace_attribute_table: + if (rst->attr_names_len && oatbl) { + struct ATTR_NAME_ENTRY *ane = attr_names; + while (ane->off) { + /* TODO: Clear table on exit! */ + oe = Add2Ptr(oatbl, le16_to_cpu(ane->off)); + t16 = le16_to_cpu(ane->name_bytes); + oe->name_len = t16 / sizeof(short); + oe->ptr = ane->name; + oe->is_attr_name = 2; + ane = Add2Ptr(ane, + sizeof(struct ATTR_NAME_ENTRY) + t16); + } + } + /* * If the checkpt_lsn is zero, then this is a freshly * formatted disk and we have no work to do. @@ -5189,7 +5167,7 @@ int log_replay(struct ntfs_inode *ni, bool *initialized) kfree(oatbl); kfree(dptbl); kfree(attr_names); - kfree(rst_info.r_page); + kfree(log->rst_info.r_page); kfree(ra); kfree(log->one_page_buf); -- GitLab From a8b0c9fc3a2dba07f697ef7825e04363ff12f071 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:46:16 +0300 Subject: [PATCH 0007/1405] fs/ntfs3: Fix multithreaded stress test Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrib.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 63f70259edc0d..4b78b669a3bdb 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -886,7 +886,7 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, struct runs_tree *run = &ni->file.run; struct ntfs_sb_info *sbi; u8 cluster_bits; - struct ATTRIB *attr = NULL, *attr_b; + struct ATTRIB *attr, *attr_b; struct ATTR_LIST_ENTRY *le, *le_b; struct mft_inode *mi, *mi_b; CLST hint, svcn, to_alloc, evcn1, next_svcn, asize, end, vcn0, alen; @@ -904,12 +904,8 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, *len = 0; up_read(&ni->file.run_lock); - if (*len) { - if (*lcn != SPARSE_LCN || !new) - return 0; /* Fast normal way without allocation. */ - else if (clen > *len) - clen = *len; - } + if (*len && (*lcn != SPARSE_LCN || !new)) + return 0; /* Fast normal way without allocation. */ /* No cluster in cache or we need to allocate cluster in hole. */ sbi = ni->mi.sbi; @@ -918,6 +914,17 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, ni_lock(ni); down_write(&ni->file.run_lock); + /* Repeat the code above (under write lock). */ + if (!run_lookup_entry(run, vcn, lcn, len, NULL)) + *len = 0; + + if (*len) { + if (*lcn != SPARSE_LCN || !new) + goto out; /* normal way without allocation. */ + if (clen > *len) + clen = *len; + } + le_b = NULL; attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); if (!attr_b) { -- GitLab From d155617006ebc172a80d3eb013c4b867f9a8ada4 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 11:47:30 +0300 Subject: [PATCH 0008/1405] fs/ntfs3: Fix detected field-spanning write (size 8) of single field "le->name" Signed-off-by: Konstantin Komarov --- fs/ntfs3/ntfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 86aecbb01a92f..13e96fc63dae5 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -523,7 +523,7 @@ struct ATTR_LIST_ENTRY { __le64 vcn; // 0x08: Starting VCN of this attribute. struct MFT_REF ref; // 0x10: MFT record number with attribute. __le16 id; // 0x18: struct ATTRIB ID. - __le16 name[3]; // 0x1A: Just to align. To get real name can use bNameOffset. + __le16 name[]; // 0x1A: Just to align. To get real name can use name_off. }; // sizeof(0x20) -- GitLab From a40b73f608e7de2120fdb9ddc8970421b3c50bc9 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 12:08:36 +0300 Subject: [PATCH 0009/1405] fs/ntfs3: Correct use bh_read Signed-off-by: Konstantin Komarov --- fs/ntfs3/file.c | 19 +++++++++---------- fs/ntfs3/inode.c | 7 +++---- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index a5a30a24ce5df..5691f04e6751a 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -188,6 +188,7 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to) u32 bh_next, bh_off, to; sector_t iblock; struct folio *folio; + bool dirty = false; for (; idx < idx_end; idx += 1, from = 0) { page_off = (loff_t)idx << PAGE_SHIFT; @@ -223,29 +224,27 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to) /* Ok, it's mapped. Make sure it's up-to-date. */ if (folio_test_uptodate(folio)) set_buffer_uptodate(bh); - - if (!buffer_uptodate(bh)) { - err = bh_read(bh, 0); - if (err < 0) { - folio_unlock(folio); - folio_put(folio); - goto out; - } + else if (bh_read(bh, 0) < 0) { + err = -EIO; + folio_unlock(folio); + folio_put(folio); + goto out; } mark_buffer_dirty(bh); - } while (bh_off = bh_next, iblock += 1, head != (bh = bh->b_this_page)); folio_zero_segment(folio, from, to); + dirty = true; folio_unlock(folio); folio_put(folio); cond_resched(); } out: - mark_inode_dirty(inode); + if (dirty) + mark_inode_dirty(inode); return err; } diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index fa6c7965473c8..bba0208c4afde 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -345,9 +345,7 @@ static struct inode *ntfs_read_mft(struct inode *inode, inode->i_size = le16_to_cpu(rp.SymbolicLinkReparseBuffer .PrintNameLength) / sizeof(u16); - ni->i_valid = inode->i_size; - /* Clear directory bit. */ if (ni->ni_flags & NI_FLAG_DIR) { indx_clear(&ni->dir); @@ -653,9 +651,10 @@ static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo, off = vbo & (PAGE_SIZE - 1); folio_set_bh(bh, folio, off); - err = bh_read(bh, 0); - if (err < 0) + if (bh_read(bh, 0) < 0) { + err = -EIO; goto out; + } folio_zero_segment(folio, off + voff, off + block_size); } } -- GitLab From 4dea9cd522424d3002894c20b729c6fbfb6fc22b Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 12:16:49 +0300 Subject: [PATCH 0010/1405] fs/ntfs3: Add file_modified Signed-off-by: Konstantin Komarov --- fs/ntfs3/file.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 5691f04e6751a..bb80ce2eec2f7 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -632,11 +632,17 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) &ni->file.run, i_size, &ni->i_valid, true, NULL); ni_unlock(ni); + if (err) + goto out; } else if (new_size > i_size) { inode->i_size = new_size; } } + err = file_modified(file); + if (err) + goto out; + out: if (map_locked) filemap_invalidate_unlock(mapping); @@ -1040,6 +1046,7 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; ssize_t ret; + int err; struct ntfs_inode *ni = ntfs_i(inode); if (is_encrypted(ni)) { @@ -1067,6 +1074,12 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ret <= 0) goto out; + err = file_modified(iocb->ki_filp); + if (err) { + ret = err; + goto out; + } + if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) { /* Should never be here, see ntfs_file_open(). */ ret = -EOPNOTSUPP; -- GitLab From e50f9560b8168a625703a3e7fe1fde9fa53f0837 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 12:17:46 +0300 Subject: [PATCH 0011/1405] fs/ntfs3: Drop suid and sgid bits as a part of fpunch Signed-off-by: Konstantin Komarov --- fs/ntfs3/file.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index bb80ce2eec2f7..0ff5d3af28897 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -498,10 +498,14 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) ni_lock(ni); err = attr_punch_hole(ni, vbo, len, &frame_size); ni_unlock(ni); + if (!err) + goto ok; + if (err != E_NTFS_NOTALIGNED) goto out; /* Process not aligned punch. */ + err = 0; mask = frame_size - 1; vbo_a = (vbo + mask) & ~mask; end_a = end & ~mask; @@ -524,6 +528,8 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) ni_lock(ni); err = attr_punch_hole(ni, vbo_a, end_a - vbo_a, NULL); ni_unlock(ni); + if (err) + goto out; } } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { /* @@ -563,6 +569,8 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) ni_lock(ni); err = attr_insert_range(ni, vbo, len); ni_unlock(ni); + if (err) + goto out; } else { /* Check new size. */ u8 cluster_bits = sbi->cluster_bits; @@ -639,6 +647,7 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) } } +ok: err = file_modified(file); if (err) goto out; -- GitLab From 6c3684e703837d2116b5cf4beb37aa7145a66b60 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 12:19:37 +0300 Subject: [PATCH 0012/1405] fs/ntfs3: Implement super_operations::shutdown Signed-off-by: Konstantin Komarov --- fs/ntfs3/file.c | 18 ++++++++++++++++++ fs/ntfs3/frecord.c | 3 +++ fs/ntfs3/inode.c | 21 +++++++++++++++++++-- fs/ntfs3/namei.c | 12 ++++++++++++ fs/ntfs3/ntfs_fs.h | 9 ++++++++- fs/ntfs3/super.c | 12 ++++++++++++ fs/ntfs3/xattr.c | 3 +++ 7 files changed, 75 insertions(+), 3 deletions(-) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 0ff5d3af28897..07ed3d946e7c5 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -260,6 +260,9 @@ static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma) bool rw = vma->vm_flags & VM_WRITE; int err; + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + if (is_encrypted(ni)) { ntfs_inode_warn(inode, "mmap encrypted not supported"); return -EOPNOTSUPP; @@ -677,6 +680,9 @@ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry, umode_t mode = inode->i_mode; int err; + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + err = setattr_prepare(idmap, dentry, attr); if (err) goto out; @@ -732,6 +738,9 @@ static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) struct inode *inode = file->f_mapping->host; struct ntfs_inode *ni = ntfs_i(inode); + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + if (is_encrypted(ni)) { ntfs_inode_warn(inode, "encrypted i/o not supported"); return -EOPNOTSUPP; @@ -766,6 +775,9 @@ static ssize_t ntfs_file_splice_read(struct file *in, loff_t *ppos, struct inode *inode = in->f_mapping->host; struct ntfs_inode *ni = ntfs_i(inode); + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + if (is_encrypted(ni)) { ntfs_inode_warn(inode, "encrypted i/o not supported"); return -EOPNOTSUPP; @@ -1058,6 +1070,9 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) int err; struct ntfs_inode *ni = ntfs_i(inode); + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + if (is_encrypted(ni)) { ntfs_inode_warn(inode, "encrypted i/o not supported"); return -EOPNOTSUPP; @@ -1118,6 +1133,9 @@ int ntfs_file_open(struct inode *inode, struct file *file) { struct ntfs_inode *ni = ntfs_i(inode); + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + if (unlikely((is_compressed(ni) || is_encrypted(ni)) && (file->f_flags & O_DIRECT))) { return -EOPNOTSUPP; diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 3df2d9e34b914..8744ba36d4222 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -3259,6 +3259,9 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint) if (is_bad_inode(inode) || sb_rdonly(sb)) return 0; + if (unlikely(ntfs3_forced_shutdown(sb))) + return -EIO; + if (!ni_trylock(ni)) { /* 'ni' is under modification, skip for now. */ mark_inode_dirty_sync(inode); diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index bba0208c4afde..85452a6b1d40a 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -852,9 +852,13 @@ static int ntfs_resident_writepage(struct folio *folio, struct writeback_control *wbc, void *data) { struct address_space *mapping = data; - struct ntfs_inode *ni = ntfs_i(mapping->host); + struct inode *inode = mapping->host; + struct ntfs_inode *ni = ntfs_i(inode); int ret; + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + ni_lock(ni); ret = attr_data_write_resident(ni, &folio->page); ni_unlock(ni); @@ -868,7 +872,12 @@ static int ntfs_resident_writepage(struct folio *folio, static int ntfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - if (is_resident(ntfs_i(mapping->host))) + struct inode *inode = mapping->host; + + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + + if (is_resident(ntfs_i(inode))) return write_cache_pages(mapping, wbc, ntfs_resident_writepage, mapping); return mpage_writepages(mapping, wbc, ntfs_get_block); @@ -888,6 +897,9 @@ int ntfs_write_begin(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; struct ntfs_inode *ni = ntfs_i(inode); + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + *pagep = NULL; if (is_resident(ni)) { struct page *page = @@ -1305,6 +1317,11 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, goto out1; } + if (unlikely(ntfs3_forced_shutdown(sb))) { + err = -EIO; + goto out2; + } + /* Mark rw ntfs as dirty. it will be cleared at umount. */ ntfs_set_state(sbi, NTFS_DIRTY_DIRTY); diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c index ee3093be51701..cae41db0aaa7d 100644 --- a/fs/ntfs3/namei.c +++ b/fs/ntfs3/namei.c @@ -181,6 +181,9 @@ static int ntfs_unlink(struct inode *dir, struct dentry *dentry) struct ntfs_inode *ni = ntfs_i(dir); int err; + if (unlikely(ntfs3_forced_shutdown(dir->i_sb))) + return -EIO; + ni_lock_dir(ni); err = ntfs_unlink_inode(dir, dentry); @@ -199,6 +202,9 @@ static int ntfs_symlink(struct mnt_idmap *idmap, struct inode *dir, u32 size = strlen(symname); struct inode *inode; + if (unlikely(ntfs3_forced_shutdown(dir->i_sb))) + return -EIO; + inode = ntfs_create_inode(idmap, dir, dentry, NULL, S_IFLNK | 0777, 0, symname, size, NULL); @@ -227,6 +233,9 @@ static int ntfs_rmdir(struct inode *dir, struct dentry *dentry) struct ntfs_inode *ni = ntfs_i(dir); int err; + if (unlikely(ntfs3_forced_shutdown(dir->i_sb))) + return -EIO; + ni_lock_dir(ni); err = ntfs_unlink_inode(dir, dentry); @@ -264,6 +273,9 @@ static int ntfs_rename(struct mnt_idmap *idmap, struct inode *dir, 1024); static_assert(PATH_MAX >= 4 * 1024); + if (unlikely(ntfs3_forced_shutdown(sb))) + return -EIO; + if (flags & ~RENAME_NOREPLACE) return -EINVAL; diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index f6706143d14bc..d40bc7669ae58 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -61,6 +61,8 @@ enum utf16_endian; /* sbi->flags */ #define NTFS_FLAGS_NODISCARD 0x00000001 +/* ntfs in shutdown state. */ +#define NTFS_FLAGS_SHUTDOWN 0x00000002 /* Set when LogFile is replaying. */ #define NTFS_FLAGS_LOG_REPLAYING 0x00000008 /* Set when we changed first MFT's which copy must be updated in $MftMirr. */ @@ -226,7 +228,7 @@ struct ntfs_sb_info { u64 maxbytes; // Maximum size for normal files. u64 maxbytes_sparse; // Maximum size for sparse file. - u32 flags; // See NTFS_FLAGS_XXX. + unsigned long flags; // See NTFS_FLAGS_ CLST zone_max; // Maximum MFT zone length in clusters CLST bad_clusters; // The count of marked bad clusters. @@ -999,6 +1001,11 @@ static inline struct ntfs_sb_info *ntfs_sb(struct super_block *sb) return sb->s_fs_info; } +static inline bool ntfs3_forced_shutdown(struct super_block *sb) +{ + return test_bit(NTFS_FLAGS_SHUTDOWN, &ntfs_sb(sb)->flags); +} + /* * ntfs_up_cluster - Align up on cluster boundary. */ diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 09d61c6c90aaf..af8521a6ed954 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -714,6 +714,14 @@ static int ntfs_show_options(struct seq_file *m, struct dentry *root) return 0; } +/* + * ntfs_shutdown - super_operations::shutdown + */ +static void ntfs_shutdown(struct super_block *sb) +{ + set_bit(NTFS_FLAGS_SHUTDOWN, &ntfs_sb(sb)->flags); +} + /* * ntfs_sync_fs - super_operations::sync_fs */ @@ -724,6 +732,9 @@ static int ntfs_sync_fs(struct super_block *sb, int wait) struct ntfs_inode *ni; struct inode *inode; + if (unlikely(ntfs3_forced_shutdown(sb))) + return -EIO; + ni = sbi->security.ni; if (ni) { inode = &ni->vfs_inode; @@ -763,6 +774,7 @@ static const struct super_operations ntfs_sops = { .put_super = ntfs_put_super, .statfs = ntfs_statfs, .show_options = ntfs_show_options, + .shutdown = ntfs_shutdown, .sync_fs = ntfs_sync_fs, .write_inode = ntfs3_write_inode, }; diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 4274b6f31cfa1..071356d096d83 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -744,6 +744,9 @@ static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de, int err; struct ntfs_inode *ni = ntfs_i(inode); + if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + return -EIO; + /* Dispatch request. */ if (!strcmp(name, SYSTEM_DOS_ATTRIB)) { /* system.dos_attrib */ -- GitLab From 97ec56d390a3a0077b36cb38627f671c72dddce6 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 24 Nov 2023 12:21:12 +0300 Subject: [PATCH 0013/1405] fs/ntfs3: ntfs3_forced_shutdown use int instead of bool Signed-off-by: Konstantin Komarov --- fs/ntfs3/fsntfs.c | 3 ++- fs/ntfs3/ntfs_fs.h | 6 +++--- fs/ntfs3/super.c | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index fbfe21dbb4259..350461d8cece5 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -853,7 +853,8 @@ void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) /* * sb can be NULL here. In this case sbi->flags should be 0 too. */ - if (!sb || !(sbi->flags & NTFS_FLAGS_MFTMIRR)) + if (!sb || !(sbi->flags & NTFS_FLAGS_MFTMIRR) || + unlikely(ntfs3_forced_shutdown(sb))) return; blocksize = sb->s_blocksize; diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index d40bc7669ae58..7510875efef6a 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -62,7 +62,7 @@ enum utf16_endian; /* sbi->flags */ #define NTFS_FLAGS_NODISCARD 0x00000001 /* ntfs in shutdown state. */ -#define NTFS_FLAGS_SHUTDOWN 0x00000002 +#define NTFS_FLAGS_SHUTDOWN_BIT 0x00000002 /* == 4*/ /* Set when LogFile is replaying. */ #define NTFS_FLAGS_LOG_REPLAYING 0x00000008 /* Set when we changed first MFT's which copy must be updated in $MftMirr. */ @@ -1001,9 +1001,9 @@ static inline struct ntfs_sb_info *ntfs_sb(struct super_block *sb) return sb->s_fs_info; } -static inline bool ntfs3_forced_shutdown(struct super_block *sb) +static inline int ntfs3_forced_shutdown(struct super_block *sb) { - return test_bit(NTFS_FLAGS_SHUTDOWN, &ntfs_sb(sb)->flags); + return test_bit(NTFS_FLAGS_SHUTDOWN_BIT, &ntfs_sb(sb)->flags); } /* diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index af8521a6ed954..65ef4b57411f0 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -719,7 +719,7 @@ static int ntfs_show_options(struct seq_file *m, struct dentry *root) */ static void ntfs_shutdown(struct super_block *sb) { - set_bit(NTFS_FLAGS_SHUTDOWN, &ntfs_sb(sb)->flags); + set_bit(NTFS_FLAGS_SHUTDOWN_BIT, &ntfs_sb(sb)->flags); } /* -- GitLab From d6ca2d253900b9b0a3a1ad77541d606010f5e5eb Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Tue, 28 Nov 2023 11:08:00 +0300 Subject: [PATCH 0014/1405] fs/ntfs3: Add and fix comments Signed-off-by: Konstantin Komarov --- fs/ntfs3/dir.c | 4 +++- fs/ntfs3/fsntfs.c | 2 +- fs/ntfs3/ntfs.h | 2 +- fs/ntfs3/ntfs_fs.h | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index 726122ecd39b4..9f6dd445eb04d 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -536,8 +536,10 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, e = Add2Ptr(hdr, off); e_size = le16_to_cpu(e->size); if (e_size < sizeof(struct NTFS_DE) || - off + e_size > end) + off + e_size > end) { + /* Looks like corruption. */ break; + } if (de_is_last(e)) break; diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 350461d8cece5..321978019407f 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -2129,8 +2129,8 @@ int ntfs_insert_security(struct ntfs_sb_info *sbi, if (le32_to_cpu(d_security->size) == new_sec_size && d_security->key.hash == hash_key.hash && !memcmp(d_security + 1, sd, size_sd)) { - *security_id = d_security->key.sec_id; /* Such security already exists. */ + *security_id = d_security->key.sec_id; err = 0; goto out; } diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 13e96fc63dae5..c8981429c7213 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -523,7 +523,7 @@ struct ATTR_LIST_ENTRY { __le64 vcn; // 0x08: Starting VCN of this attribute. struct MFT_REF ref; // 0x10: MFT record number with attribute. __le16 id; // 0x18: struct ATTRIB ID. - __le16 name[]; // 0x1A: Just to align. To get real name can use name_off. + __le16 name[]; // 0x1A: To get real name use name_off. }; // sizeof(0x20) diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 7510875efef6a..abbc7182554aa 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -874,7 +874,7 @@ int ntfs_init_acl(struct mnt_idmap *idmap, struct inode *inode, int ntfs_acl_chmod(struct mnt_idmap *idmap, struct dentry *dentry); ssize_t ntfs_listxattr(struct dentry *dentry, char *buffer, size_t size); -extern const struct xattr_handler * const ntfs_xattr_handlers[]; +extern const struct xattr_handler *const ntfs_xattr_handlers[]; int ntfs_save_wsl_perm(struct inode *inode, __le16 *ea_size); void ntfs_get_wsl_perm(struct inode *inode); -- GitLab From aaab47f204aaf47838241d57bf8662c8840de60a Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Tue, 28 Nov 2023 11:09:34 +0300 Subject: [PATCH 0015/1405] fs/ntfs3: Add NULL ptr dereference checking at the end of attr_allocate_frame() It is preferable to exit through the out: label because internal debugging functions are located there. Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrib.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 4b78b669a3bdb..646e2dad1b757 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -1743,8 +1743,10 @@ int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size, le_b = NULL; attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); - if (!attr_b) - return -ENOENT; + if (!attr_b) { + err = -ENOENT; + goto out; + } attr = attr_b; le = le_b; @@ -1825,13 +1827,15 @@ int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size, ok: run_truncate_around(run, vcn); out: - if (new_valid > data_size) - new_valid = data_size; + if (attr_b) { + if (new_valid > data_size) + new_valid = data_size; - valid_size = le64_to_cpu(attr_b->nres.valid_size); - if (new_valid != valid_size) { - attr_b->nres.valid_size = cpu_to_le64(valid_size); - mi_b->dirty = true; + valid_size = le64_to_cpu(attr_b->nres.valid_size); + if (new_valid != valid_size) { + attr_b->nres.valid_size = cpu_to_le64(valid_size); + mi_b->dirty = true; + } } return err; -- GitLab From 652483bfbc45137e8dce556c9ddbd4458dad4452 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Tue, 28 Nov 2023 11:17:20 +0300 Subject: [PATCH 0016/1405] fs/ntfs3: Fix c/mtime typo Signed-off-by: Konstantin Komarov --- fs/ntfs3/frecord.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 8744ba36d4222..6ff4f70ba0775 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -3291,7 +3291,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint) modified = true; } - ts = inode_get_mtime(inode); + ts = inode_get_ctime(inode); dup.c_time = kernel2nt(&ts); if (std->c_time != dup.c_time) { std->c_time = dup.c_time; -- GitLab From fe752331d4b361d43cfd0b89534b4b2176057c32 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 20 Dec 2023 13:53:17 +0100 Subject: [PATCH 0017/1405] KVM: s390: vsie: fix race during shadow creation Right now it is possible to see gmap->private being zero in kvm_s390_vsie_gmap_notifier resulting in a crash. This is due to the fact that we add gmap->private == kvm after creation: static int acquire_gmap_shadow(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { [...] gmap = gmap_shadow(vcpu->arch.gmap, asce, edat); if (IS_ERR(gmap)) return PTR_ERR(gmap); gmap->private = vcpu->kvm; Let children inherit the private field of the parent. Reported-by: Marc Hartmayer Fixes: a3508fbe9dc6 ("KVM: s390: vsie: initial support for nested virtualization") Cc: Cc: David Hildenbrand Reviewed-by: Janosch Frank Reviewed-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Signed-off-by: Christian Borntraeger Link: https://lore.kernel.org/r/20231220125317.4258-1-borntraeger@linux.ibm.com --- arch/s390/kvm/vsie.c | 1 - arch/s390/mm/gmap.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 8207a892bbe22..db9a180de65f1 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1220,7 +1220,6 @@ static int acquire_gmap_shadow(struct kvm_vcpu *vcpu, gmap = gmap_shadow(vcpu->arch.gmap, asce, edat); if (IS_ERR(gmap)) return PTR_ERR(gmap); - gmap->private = vcpu->kvm; vcpu->kvm->stat.gmap_shadow_create++; WRITE_ONCE(vsie_page->gmap, gmap); return 0; diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 6f96b5a71c638..8da39deb56ca4 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -1691,6 +1691,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, return ERR_PTR(-ENOMEM); new->mm = parent->mm; new->parent = gmap_get(parent); + new->private = parent->private; new->orig_asce = asce; new->edat_level = edat_level; new->initialized = false; -- GitLab From 4cdfb6e7bc9c80142d33bf1d4653a73fa678ba56 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Thu, 21 Dec 2023 13:59:43 +0300 Subject: [PATCH 0018/1405] fs/ntfs3: Disable ATTR_LIST_ENTRY size check The use of sizeof(struct ATTR_LIST_ENTRY) has been replaced with le_size(0) due to alignment peculiarities on different platforms. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312071005.g6YrbaIe-lkp@intel.com/ Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrlist.c | 8 ++++---- fs/ntfs3/ntfs.h | 2 -- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c index 7c01735d1219d..48e7da47c6b71 100644 --- a/fs/ntfs3/attrlist.c +++ b/fs/ntfs3/attrlist.c @@ -127,12 +127,13 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, { size_t off; u16 sz; + const unsigned le_min_size = le_size(0); if (!le) { le = ni->attr_list.le; } else { sz = le16_to_cpu(le->size); - if (sz < sizeof(struct ATTR_LIST_ENTRY)) { + if (sz < le_min_size) { /* Impossible 'cause we should not return such le. */ return NULL; } @@ -141,7 +142,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, /* Check boundary. */ off = PtrOffset(ni->attr_list.le, le); - if (off + sizeof(struct ATTR_LIST_ENTRY) > ni->attr_list.size) { + if (off + le_min_size > ni->attr_list.size) { /* The regular end of list. */ return NULL; } @@ -149,8 +150,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, sz = le16_to_cpu(le->size); /* Check le for errors. */ - if (sz < sizeof(struct ATTR_LIST_ENTRY) || - off + sz > ni->attr_list.size || + if (sz < le_min_size || off + sz > ni->attr_list.size || sz < le->name_off + le->name_len * sizeof(short)) { return NULL; } diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index c8981429c7213..9c7478150a035 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -527,8 +527,6 @@ struct ATTR_LIST_ENTRY { }; // sizeof(0x20) -static_assert(sizeof(struct ATTR_LIST_ENTRY) == 0x20); - static inline u32 le_size(u8 name_len) { return ALIGN(offsetof(struct ATTR_LIST_ENTRY, name) + -- GitLab From 83303a4c776ce1032d88df59e811183479acea77 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 1 Dec 2023 19:16:57 +0100 Subject: [PATCH 0019/1405] KVM: s390: fix cc for successful PQAP The various errors that are possible when processing a PQAP instruction (the absence of a driver hook, an error FROM that hook), all correctly set the PSW condition code to 3. But if that processing works successfully, CC0 needs to be set to convey that everything was fine. Fix the check so that the guest can examine the condition code to determine whether GPR1 has meaningful data. Fixes: e5282de93105 ("s390: ap: kvm: add PQAP interception for AQIC") Signed-off-by: Eric Farman Reviewed-by: Tony Krowiak Reviewed-by: Halil Pasic Link: https://lore.kernel.org/r/20231201181657.1614645-1-farman@linux.ibm.com Signed-off-by: Janosch Frank Message-Id: <20231201181657.1614645-1-farman@linux.ibm.com> Signed-off-by: Christian Borntraeger --- arch/s390/kvm/priv.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 621a17fd1a1bb..f875a404a0a02 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -676,8 +676,12 @@ static int handle_pqap(struct kvm_vcpu *vcpu) if (vcpu->kvm->arch.crypto.pqap_hook) { pqap_hook = *vcpu->kvm->arch.crypto.pqap_hook; ret = pqap_hook(vcpu); - if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000) - kvm_s390_set_psw_cc(vcpu, 3); + if (!ret) { + if (vcpu->run->s.regs.gprs[1] & 0x00ff0000) + kvm_s390_set_psw_cc(vcpu, 3); + else + kvm_s390_set_psw_cc(vcpu, 0); + } up_read(&vcpu->kvm->arch.crypto.pqap_hook_rwsem); return ret; } -- GitLab From 85e985a4f46e462a37f1875cb74ed380e7c0c2e0 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 10 Jan 2024 15:16:26 +0200 Subject: [PATCH 0020/1405] interconnect: qcom: sc8180x: Mark CO0 BCM keepalive The CO0 BCM needs to be up at all times, otherwise some hardware (like the UFS controller) loses its connection to the rest of the SoC, resulting in a hang of the platform, accompanied by a spectacular logspam. Mark it as keepalive to prevent such cases. Fixes: 9c8c6bac1ae8 ("interconnect: qcom: Add SC8180x providers") Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20231214-topic-sc8180_fixes-v1-1-421904863006@linaro.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/sc8180x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/interconnect/qcom/sc8180x.c b/drivers/interconnect/qcom/sc8180x.c index 20331e119beb6..03d626776ba17 100644 --- a/drivers/interconnect/qcom/sc8180x.c +++ b/drivers/interconnect/qcom/sc8180x.c @@ -1372,6 +1372,7 @@ static struct qcom_icc_bcm bcm_mm0 = { static struct qcom_icc_bcm bcm_co0 = { .name = "CO0", + .keepalive = true, .num_nodes = 1, .nodes = { &slv_qns_cdsp_mem_noc } }; -- GitLab From 24406f6794aa631516241deb9e19de333d6a0600 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 10 Jan 2024 15:16:46 +0200 Subject: [PATCH 0021/1405] interconnect: qcom: sm8550: Enable sync_state To ensure the interconnect votes are actually meaningful and in order to prevent holding all buses at FMAX, introduce the sync state callback. Fixes: e6f0d6a30f73 ("interconnect: qcom: Add SM8550 interconnect provider driver") Signed-off-by: Konrad Dybcio Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20231218-topic-8550_fixes-v1-2-ce1272d77540@linaro.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/sm8550.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/interconnect/qcom/sm8550.c b/drivers/interconnect/qcom/sm8550.c index 629faa4c9aaee..fc22cecf650fc 100644 --- a/drivers/interconnect/qcom/sm8550.c +++ b/drivers/interconnect/qcom/sm8550.c @@ -2223,6 +2223,7 @@ static struct platform_driver qnoc_driver = { .driver = { .name = "qnoc-sm8550", .of_match_table = qnoc_of_match, + .sync_state = icc_sync_state, }, }; -- GitLab From a9668169961106f3598384fe95004106ec191201 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 12 Jan 2024 17:34:14 +0300 Subject: [PATCH 0022/1405] HID: hid-steam: remove pointless error message This error message doesn't really add any information. If modprobe fails then the user will already know what the error code is. In the case of kmalloc() it's a style violation to print an error message for that because kmalloc has it's own better error messages built in. Signed-off-by: Dan Carpenter Reviewed-by: Vicki Pfau Link: https://lore.kernel.org/r/305898fb-6bd4-4749-806c-05ec51bbeb80@moroto.mountain Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-steam.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index b3c4e50e248aa..59df6ead7b549 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -1109,10 +1109,9 @@ static int steam_probe(struct hid_device *hdev, return hid_hw_start(hdev, HID_CONNECT_DEFAULT); steam = devm_kzalloc(&hdev->dev, sizeof(*steam), GFP_KERNEL); - if (!steam) { - ret = -ENOMEM; - goto steam_alloc_fail; - } + if (!steam) + return -ENOMEM; + steam->hdev = hdev; hid_set_drvdata(hdev, steam); spin_lock_init(&steam->lock); @@ -1179,9 +1178,6 @@ static int steam_probe(struct hid_device *hdev, cancel_work_sync(&steam->work_connect); cancel_delayed_work_sync(&steam->mode_switch); cancel_work_sync(&steam->rumble_work); -steam_alloc_fail: - hid_err(hdev, "%s: failed with error %d\n", - __func__, ret); return ret; } -- GitLab From a9f1da09c69f13ef471db8b22107a28042d230ca Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 12 Jan 2024 17:35:06 +0300 Subject: [PATCH 0023/1405] HID: hid-steam: Fix cleanup in probe() There are a number of issues in this code. First of all if steam_create_client_hid() fails then it leads to an error pointer dereference when we call hid_destroy_device(steam->client_hdev). Also there are a number of leaks. hid_hw_stop() is not called if hid_hw_open() fails for example. And it doesn't call steam_unregister() or hid_hw_close(). Fixes: 691ead124a0c ("HID: hid-steam: Clean up locking") Signed-off-by: Dan Carpenter Reviewed-by: Vicki Pfau Link: https://lore.kernel.org/r/1fd87904-dabf-4879-bb89-72d13ebfc91e@moroto.mountain Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-steam.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index 59df6ead7b549..b08a5ab585288 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -1128,14 +1128,14 @@ static int steam_probe(struct hid_device *hdev, */ ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_HIDRAW); if (ret) - goto hid_hw_start_fail; + goto err_cancel_work; ret = hid_hw_open(hdev); if (ret) { hid_err(hdev, "%s:hid_hw_open\n", __func__); - goto hid_hw_open_fail; + goto err_hw_stop; } if (steam->quirks & STEAM_QUIRK_WIRELESS) { @@ -1151,33 +1151,37 @@ static int steam_probe(struct hid_device *hdev, hid_err(hdev, "%s:steam_register failed with error %d\n", __func__, ret); - goto input_register_fail; + goto err_hw_close; } } steam->client_hdev = steam_create_client_hid(hdev); if (IS_ERR(steam->client_hdev)) { ret = PTR_ERR(steam->client_hdev); - goto client_hdev_fail; + goto err_stream_unregister; } steam->client_hdev->driver_data = steam; ret = hid_add_device(steam->client_hdev); if (ret) - goto client_hdev_add_fail; + goto err_destroy; return 0; -client_hdev_add_fail: - hid_hw_stop(hdev); -client_hdev_fail: +err_destroy: hid_destroy_device(steam->client_hdev); -input_register_fail: -hid_hw_open_fail: -hid_hw_start_fail: +err_stream_unregister: + if (steam->connected) + steam_unregister(steam); +err_hw_close: + hid_hw_close(hdev); +err_hw_stop: + hid_hw_stop(hdev); +err_cancel_work: cancel_work_sync(&steam->work_connect); cancel_delayed_work_sync(&steam->mode_switch); cancel_work_sync(&steam->rumble_work); + return ret; } -- GitLab From ddb17dc880eeaac37b5a6e984de07b882de7d78d Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Tue, 16 Jan 2024 10:32:20 +0300 Subject: [PATCH 0024/1405] fs/ntfs3: Use kvfree to free memory allocated by kvmalloc Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrlist.c | 4 ++-- fs/ntfs3/bitmap.c | 4 ++-- fs/ntfs3/frecord.c | 4 ++-- fs/ntfs3/super.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c index 48e7da47c6b71..9f4bd8d260901 100644 --- a/fs/ntfs3/attrlist.c +++ b/fs/ntfs3/attrlist.c @@ -29,7 +29,7 @@ static inline bool al_is_valid_le(const struct ntfs_inode *ni, void al_destroy(struct ntfs_inode *ni) { run_close(&ni->attr_list.run); - kfree(ni->attr_list.le); + kvfree(ni->attr_list.le); ni->attr_list.le = NULL; ni->attr_list.size = 0; ni->attr_list.dirty = false; @@ -318,7 +318,7 @@ int al_add_le(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name, memcpy(ptr, al->le, off); memcpy(Add2Ptr(ptr, off + sz), le, old_size - off); le = Add2Ptr(ptr, off); - kfree(al->le); + kvfree(al->le); al->le = ptr; } else { memmove(Add2Ptr(le, sz), le, old_size - off); diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c index 63f14a0232f6a..845f9b22deef0 100644 --- a/fs/ntfs3/bitmap.c +++ b/fs/ntfs3/bitmap.c @@ -124,7 +124,7 @@ void wnd_close(struct wnd_bitmap *wnd) { struct rb_node *node, *next; - kfree(wnd->free_bits); + kvfree(wnd->free_bits); wnd->free_bits = NULL; run_close(&wnd->run); @@ -1360,7 +1360,7 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits) memcpy(new_free, wnd->free_bits, wnd->nwnd * sizeof(short)); memset(new_free + wnd->nwnd, 0, (new_wnd - wnd->nwnd) * sizeof(short)); - kfree(wnd->free_bits); + kvfree(wnd->free_bits); wnd->free_bits = new_free; } diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 6ff4f70ba0775..2636ab7640ace 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -778,7 +778,7 @@ static int ni_try_remove_attr_list(struct ntfs_inode *ni) run_deallocate(sbi, &ni->attr_list.run, true); run_close(&ni->attr_list.run); ni->attr_list.size = 0; - kfree(ni->attr_list.le); + kvfree(ni->attr_list.le); ni->attr_list.le = NULL; ni->attr_list.dirty = false; @@ -927,7 +927,7 @@ int ni_create_attr_list(struct ntfs_inode *ni) return 0; out: - kfree(ni->attr_list.le); + kvfree(ni->attr_list.le); ni->attr_list.le = NULL; ni->attr_list.size = 0; return err; diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 65ef4b57411f0..c55a29793a8d8 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -625,7 +625,7 @@ static void ntfs3_free_sbi(struct ntfs_sb_info *sbi) { kfree(sbi->new_rec); kvfree(ntfs_put_shared(sbi->upcase)); - kfree(sbi->def_table); + kvfree(sbi->def_table); kfree(sbi->compress.lznt); #ifdef CONFIG_NTFS3_LZX_XPRESS xpress_free_decompressor(sbi->compress.xpress); -- GitLab From 935024212dafebe065ccb5c4d399f19e4b8dbb82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Tue, 16 Jan 2024 06:27:31 +0000 Subject: [PATCH 0025/1405] dt-bindings: don't anchor DT_SCHEMA_FILES to bindings directory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 5e3ef4546819 ("dt-bindings: ignore paths outside kernel for DT_SCHEMA_FILES") anchored all searches to the bindings directory (since bindings only exist below that), but it turns out this is not always desired. Just anchor to the base kernel source directory and while at it, break the overly long line for legibility. Reported-by: Michal Simek Fixes: 5e3ef4546819 ("dt-bindings: ignore paths outside kernel for DT_SCHEMA_FILES") Closes: https://lore.kernel.org/all/827695c3-bb33-4a86-8586-2c7323530398@amd.com/ Cc: Masahiro Yamada Signed-off-by: André Draszik Tested-by: Michal Simek Link: https://lore.kernel.org/r/20240116062731.2810067-1-git@andred.net Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/Makefile b/Documentation/devicetree/bindings/Makefile index 2323fd5b7cdae..129cf698fa8a6 100644 --- a/Documentation/devicetree/bindings/Makefile +++ b/Documentation/devicetree/bindings/Makefile @@ -28,7 +28,10 @@ $(obj)/%.example.dts: $(src)/%.yaml check_dtschema_version FORCE find_all_cmd = find $(srctree)/$(src) \( -name '*.yaml' ! \ -name 'processed-schema*' \) -find_cmd = $(find_all_cmd) | sed 's|^$(srctree)/$(src)/||' | grep -F -e "$(subst :," -e ",$(DT_SCHEMA_FILES))" | sed 's|^|$(srctree)/$(src)/|' +find_cmd = $(find_all_cmd) | \ + sed 's|^$(srctree)/||' | \ + grep -F -e "$(subst :," -e ",$(DT_SCHEMA_FILES))" | \ + sed 's|^|$(srctree)/|' CHK_DT_DOCS := $(shell $(find_cmd)) quiet_cmd_yamllint = LINT $(src) -- GitLab From 658365c6b0857e6a306436e315a8633937e3af42 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Fri, 12 Jan 2024 12:19:27 +0800 Subject: [PATCH 0026/1405] scsi: isci: Fix an error code problem in isci_io_request_build() Clang static complains that Value stored to 'status' is never read. Return 'status' rather than 'SCI_SUCCESS'. Fixes: f1f52e75939b ("isci: uplevel request infrastructure") Signed-off-by: Su Hui Link: https://lore.kernel.org/r/20240112041926.3924315-1-suhui@nfschina.com Reviewed-by: Artur Paszkiewicz Signed-off-by: Martin K. Petersen --- drivers/scsi/isci/request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index 71f711cb0628a..355a0bc0828e7 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -3387,7 +3387,7 @@ static enum sci_status isci_io_request_build(struct isci_host *ihost, return SCI_FAILURE; } - return SCI_SUCCESS; + return status; } static struct isci_request *isci_request_from_tag(struct isci_host *ihost, u16 tag) -- GitLab From d6b75ba5218915be48542bcd7e2a09776b7c66c9 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 16 Jan 2024 12:58:36 +0800 Subject: [PATCH 0027/1405] scsi: virtio_scsi: Remove duplicate check if queue is broken virtqueue_enable_cb() will call virtqueue_poll() which will check if queue is broken at beginning, so remove the virtqueue_is_broken() call Signed-off-by: Li RongQing Link: https://lore.kernel.org/r/20240116045836.12475-1-lirongqing@baidu.com Reviewed-by: Stefan Hajnoczi Acked-by: Michael S. Tsirkin Signed-off-by: Martin K. Petersen --- drivers/scsi/virtio_scsi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 9d1bdcdc13312..e15b3809d0595 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -182,8 +182,6 @@ static void virtscsi_vq_done(struct virtio_scsi *vscsi, while ((buf = virtqueue_get_buf(vq, &len)) != NULL) fn(vscsi, buf); - if (unlikely(virtqueue_is_broken(vq))) - break; } while (!virtqueue_enable_cb(vq)); spin_unlock_irqrestore(&virtscsi_vq->vq_lock, flags); } -- GitLab From e6f3799de2f2b2cd23c3894a127921dfb6c6a512 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 16 Jan 2024 11:26:06 +0000 Subject: [PATCH 0028/1405] scsi: initio: Remove redundant variable 'rb' The variable 'rb' is being assigned a value but it isn't being read afterwards. The assignment is redundant and so 'rb' can be removed. Cleans up clang scan build warning: warning: Although the value stored to 'rb' is used in the enclosing expression, the value is never actually read from 'rb'[deadcode.DeadStores] Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20240116112606.2263738-1-colin.i.king@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/initio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/initio.c b/drivers/scsi/initio.c index 2a50fda3a628c..625fd547ee60a 100644 --- a/drivers/scsi/initio.c +++ b/drivers/scsi/initio.c @@ -371,7 +371,6 @@ static u16 initio_se2_rd(unsigned long base, u8 addr) */ static void initio_se2_wr(unsigned long base, u8 addr, u16 val) { - u8 rb; u8 instr; int i; @@ -400,7 +399,7 @@ static void initio_se2_wr(unsigned long base, u8 addr, u16 val) udelay(30); outb(SE2CS, base + TUL_NVRAM); /* -CLK */ udelay(30); - if ((rb = inb(base + TUL_NVRAM)) & SE2DI) + if (inb(base + TUL_NVRAM) & SE2DI) break; /* write complete */ } outb(0, base + TUL_NVRAM); /* -CS */ -- GitLab From 7d1ae55ffed0ff78ed33b1465c1233e05024e135 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Tue, 16 Jan 2024 13:55:09 -0800 Subject: [PATCH 0029/1405] scsi: MAINTAINERS: Update ibmvscsi_tgt maintainer Michael has not been responsible for this code as an IBMer for quite some time. Seeing as the rest of the IBM Virtual SCSI related drivers already fall under my purview replace Michael with myself as maintainer. Signed-off-by: Tyrel Datwyler Link: https://lore.kernel.org/r/20240116215509.1155787-1-tyreld@linux.ibm.com Signed-off-by: Martin K. Petersen --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index b5ac8c4ae4345..afbd909258dfe 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10203,7 +10203,7 @@ F: drivers/scsi/ibmvscsi/ibmvscsi* F: include/scsi/viosrp.h IBM Power Virtual SCSI Device Target Driver -M: Michael Cyr +M: Tyrel Datwyler L: linux-scsi@vger.kernel.org L: target-devel@vger.kernel.org S: Supported -- GitLab From 4d695869d3fb952d6f690bec0260ffe2a8b99b37 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 17 Jan 2024 14:27:15 +0100 Subject: [PATCH 0030/1405] selftests/hid: wacom: fix confidence tests The device is exported with a fuzz of 4, meaning that the `+ t` here is removed by the fuzz algorithm, making those tests failing. Not sure why, but when I run this locally it was passing, but not in the VM of the CI. Fixes: b0fb904d074e ("HID: wacom: Add additional tests of confidence behavior") Link: https://gitlab.freedesktop.org/bentiss/hid/-/jobs/53692957#L3315 Acked-by: Jason Gerecke Link: https://lore.kernel.org/r/20240117-b4-wip-wacom-tests-fixes-v1-1-f317784f3c36@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/test_wacom_generic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py index 352fc39f3c6c1..b62c7dba6777f 100644 --- a/tools/testing/selftests/hid/tests/test_wacom_generic.py +++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py @@ -880,8 +880,8 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest does not overlap with other contacts. The value of `t` may be incremented over time to move the point along a linear path. """ - x = 50 + 10 * contact_id + t - y = 100 + 100 * contact_id + t + x = 50 + 10 * contact_id + t * 11 + y = 100 + 100 * contact_id + t * 11 return test_multitouch.Touch(contact_id, x, y) def make_contacts(self, n, t=0): @@ -902,8 +902,8 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest tracking_id = contact_ids.tracking_id slot_num = contact_ids.slot_num - x = 50 + 10 * contact_id + t - y = 100 + 100 * contact_id + t + x = 50 + 10 * contact_id + t * 11 + y = 100 + 100 * contact_id + t * 11 # If the data isn't supposed to be stored in any slots, there is # nothing we can check for in the evdev stream. -- GitLab From 0991abeddefa118479b0af32c28bcd662dec1561 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Thu, 18 Jan 2024 09:52:52 +0800 Subject: [PATCH 0031/1405] exfat: fix zero the unwritten part for dio read For dio read, bio will be leave in flight when a successful partial aio read have been setup, blockdev_direct_IO() will return -EIOCBQUEUED. In the case, iter->iov_offset will be not advanced, the oops reported by syzbot will occur if revert iter->iov_offset with iov_iter_revert(). The unwritten part had been zeroed by aio read, so there is no need to zero it in dio read. Reported-by: syzbot+fd404f6b03a58e8bc403@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=fd404f6b03a58e8bc403 Fixes: 11a347fb6cef ("exfat: change to get file size from DataLength") Signed-off-by: Yuezhang Mo Signed-off-by: Namjae Jeon --- fs/exfat/inode.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index 522edcbb2ce4d..0687f952956c3 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -501,7 +501,7 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter) struct inode *inode = mapping->host; struct exfat_inode_info *ei = EXFAT_I(inode); loff_t pos = iocb->ki_pos; - loff_t size = iocb->ki_pos + iov_iter_count(iter); + loff_t size = pos + iov_iter_count(iter); int rw = iov_iter_rw(iter); ssize_t ret; @@ -525,11 +525,10 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter) */ ret = blockdev_direct_IO(iocb, inode, iter, exfat_get_block); if (ret < 0) { - if (rw == WRITE) + if (rw == WRITE && ret != -EIOCBQUEUED) exfat_write_failed(mapping, size); - if (ret != -EIOCBQUEUED) - return ret; + return ret; } else size = pos + ret; -- GitLab From 55583e899a5357308274601364741a83e78d6ac4 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:33 +0800 Subject: [PATCH 0032/1405] ext4: fix double-free of blocks due to wrong extents moved_len In ext4_move_extents(), moved_len is only updated when all moves are successfully executed, and only discards orig_inode and donor_inode preallocations when moved_len is not zero. When the loop fails to exit after successfully moving some extents, moved_len is not updated and remains at 0, so it does not discard the preallocations. If the moved extents overlap with the preallocated extents, the overlapped extents are freed twice in ext4_mb_release_inode_pa() and ext4_process_freed_data() (as described in commit 94d7c16cbbbd ("ext4: Fix double-free of blocks with EXT4_IOC_MOVE_EXT")), and bb_free is incremented twice. Hence when trim is executed, a zero-division bug is triggered in mb_update_avg_fragment_size() because bb_free is not zero and bb_fragments is zero. Therefore, update move_len after each extent move to avoid the issue. Reported-by: Wei Chen Reported-by: xingwei lee Closes: https://lore.kernel.org/r/CAO4mrferzqBUnCag8R3m2zf897ts9UEuhjFQGPtODT92rYyR2Q@mail.gmail.com Fixes: fcf6b1b729bc ("ext4: refactor ext4_move_extents code base") CC: # 3.18 Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-2-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/move_extent.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 3aa57376d9c2e..391efa6d4c56f 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -618,6 +618,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, goto out; o_end = o_start + len; + *moved_len = 0; while (o_start < o_end) { struct ext4_extent *ex; ext4_lblk_t cur_blk, next_blk; @@ -672,7 +673,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, */ ext4_double_up_write_data_sem(orig_inode, donor_inode); /* Swap original branches with new branches */ - move_extent_per_page(o_filp, donor_inode, + *moved_len += move_extent_per_page(o_filp, donor_inode, orig_page_index, donor_page_index, offset_in_page, cur_len, unwritten, &ret); @@ -682,9 +683,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, o_start += cur_len; d_start += cur_len; } - *moved_len = o_start - orig_blk; - if (*moved_len > len) - *moved_len = len; out: if (*moved_len) { -- GitLab From 172202152a125955367393956acf5f4ffd092e0d Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:34 +0800 Subject: [PATCH 0033/1405] ext4: do not trim the group with corrupted block bitmap Otherwise operating on an incorrupted block bitmap can lead to all sorts of unknown problems. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-3-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f44f668e407f2..c86c90b494de3 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -6761,6 +6761,9 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) bool set_trimmed = false; void *bitmap; + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + return 0; + last = ext4_last_grp_cluster(sb, e4b->bd_group); bitmap = e4b->bd_bitmap; if (start == 0 && max >= last) -- GitLab From c9b528c35795b711331ed36dc3dbee90d5812d4e Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:35 +0800 Subject: [PATCH 0034/1405] ext4: regenerate buddy after block freeing failed if under fc replay This mostly reverts commit 6bd97bf273bd ("ext4: remove redundant mb_regenerate_buddy()") and reintroduces mb_regenerate_buddy(). Based on code in mb_free_blocks(), fast commit replay can end up marking as free blocks that are already marked as such. This causes corruption of the buddy bitmap so we need to regenerate it in that case. Reported-by: Jan Kara Fixes: 6bd97bf273bd ("ext4: remove redundant mb_regenerate_buddy()") Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-4-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c86c90b494de3..c97ad0e778318 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1233,6 +1233,24 @@ void ext4_mb_generate_buddy(struct super_block *sb, atomic64_add(period, &sbi->s_mb_generation_time); } +static void mb_regenerate_buddy(struct ext4_buddy *e4b) +{ + int count; + int order = 1; + void *buddy; + + while ((buddy = mb_find_buddy(e4b, order++, &count))) + mb_set_bits(buddy, 0, count); + + e4b->bd_info->bb_fragments = 0; + memset(e4b->bd_info->bb_counters, 0, + sizeof(*e4b->bd_info->bb_counters) * + (e4b->bd_sb->s_blocksize_bits + 2)); + + ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy, + e4b->bd_bitmap, e4b->bd_group, e4b->bd_info); +} + /* The buddy information is attached the buddy cache inode * for convenience. The information regarding each group * is loaded via ext4_mb_load_buddy. The information involve @@ -1920,6 +1938,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, ext4_mark_group_bitmap_corrupted( sb, e4b->bd_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); + } else { + mb_regenerate_buddy(e4b); } goto done; } -- GitLab From 2331fd4a49864e1571b4f50aa3aa1536ed6220d0 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:36 +0800 Subject: [PATCH 0035/1405] ext4: avoid bb_free and bb_fragments inconsistency in mb_free_blocks() After updating bb_free in mb_free_blocks, it is possible to return without updating bb_fragments because the block being freed is found to have already been freed, which leads to inconsistency between bb_free and bb_fragments. Since the group may be unlocked in ext4_grp_locked_error(), this can lead to problems such as dividing by zero when calculating the average fragment length. Hence move the update of bb_free to after the block double-free check guarantees that the corresponding statistics are updated only after the core block bitmap is modified. Fixes: eabe0444df90 ("ext4: speed-up releasing blocks on commit") CC: # 3.10 Suggested-by: Jan Kara Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-5-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c97ad0e778318..fa351aa323dcf 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1909,11 +1909,6 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, mb_check_buddy(e4b); mb_free_blocks_double(inode, e4b, first, count); - this_cpu_inc(discard_pa_seq); - e4b->bd_info->bb_free += count; - if (first < e4b->bd_info->bb_first_free) - e4b->bd_info->bb_first_free = first; - /* access memory sequentially: check left neighbour, * clear range and then check right neighbour */ @@ -1927,23 +1922,31 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t blocknr; + /* + * Fastcommit replay can free already freed blocks which + * corrupts allocation info. Regenerate it. + */ + if (sbi->s_mount_state & EXT4_FC_REPLAY) { + mb_regenerate_buddy(e4b); + goto check; + } + blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += EXT4_C2B(sbi, block); - if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) { - ext4_grp_locked_error(sb, e4b->bd_group, - inode ? inode->i_ino : 0, - blocknr, - "freeing already freed block (bit %u); block bitmap corrupt.", - block); - ext4_mark_group_bitmap_corrupted( - sb, e4b->bd_group, + ext4_grp_locked_error(sb, e4b->bd_group, + inode ? inode->i_ino : 0, blocknr, + "freeing already freed block (bit %u); block bitmap corrupt.", + block); + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); - } else { - mb_regenerate_buddy(e4b); - } - goto done; + return; } + this_cpu_inc(discard_pa_seq); + e4b->bd_info->bb_free += count; + if (first < e4b->bd_info->bb_first_free) + e4b->bd_info->bb_first_free = first; + /* let's maintain fragments counter */ if (left_is_free && right_is_free) e4b->bd_info->bb_fragments--; @@ -1968,9 +1971,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, if (first <= last) mb_buddy_mark_free(e4b, first >> 1, last >> 1); -done: mb_set_largest_free_order(sb, e4b->bd_info); mb_update_avg_fragment_size(sb, e4b->bd_info); +check: mb_check_buddy(e4b); } -- GitLab From 993bf0f4c393b3667830918f9247438a8f6fdb5b Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:37 +0800 Subject: [PATCH 0036/1405] ext4: avoid dividing by 0 in mb_update_avg_fragment_size() when block bitmap corrupt Determine if bb_fragments is 0 instead of determining bb_free to eliminate the risk of dividing by zero when the block bitmap is corrupted. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-6-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index fa351aa323dcf..751e31df0f104 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -842,7 +842,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp) struct ext4_sb_info *sbi = EXT4_SB(sb); int new_order; - if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0) + if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_fragments == 0) return; new_order = mb_avg_fragment_size_order(sb, -- GitLab From 4530b3660d396a646aad91a787b6ab37cf604b53 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:38 +0800 Subject: [PATCH 0037/1405] ext4: avoid allocating blocks from corrupted group in ext4_mb_try_best_found() Determine if the group block bitmap is corrupted before using ac_b_ex in ext4_mb_try_best_found() to avoid allocating blocks from a group with a corrupted block bitmap in the following concurrency and making the situation worse. ext4_mb_regular_allocator ext4_lock_group(sb, group) ext4_mb_good_group // check if the group bbitmap is corrupted ext4_mb_complex_scan_group // Scan group gets ac_b_ex but doesn't use it ext4_unlock_group(sb, group) ext4_mark_group_bitmap_corrupted(group) // The block bitmap was corrupted during // the group unlock gap. ext4_mb_try_best_found ext4_lock_group(ac->ac_sb, group) ext4_mb_use_best_found mb_mark_used // Allocating blocks in block bitmap corrupted group Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-7-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 751e31df0f104..82afe275e6c88 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2299,6 +2299,9 @@ void ext4_mb_try_best_found(struct ext4_allocation_context *ac, return; ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex); if (max > 0) { @@ -2306,6 +2309,7 @@ void ext4_mb_try_best_found(struct ext4_allocation_context *ac, ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); } -- GitLab From 832698373a25950942c04a512daa652c18a9b513 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:39 +0800 Subject: [PATCH 0038/1405] ext4: avoid allocating blocks from corrupted group in ext4_mb_find_by_goal() Places the logic for checking if the group's block bitmap is corrupt under the protection of the group lock to avoid allocating blocks from the group with a corrupted block bitmap. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-8-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 82afe275e6c88..2069d768c6198 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2336,12 +2336,10 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, if (err) return err; - if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) { - ext4_mb_unload_buddy(e4b); - return 0; - } - ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, ac->ac_g_ex.fe_len, &ex); ex.fe_logical = 0xDEADFA11; /* debug value */ @@ -2374,6 +2372,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, ac->ac_b_ex = ex; ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); -- GitLab From c5f3a3821de42ede9bcaeb892d1539717cf590cb Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:40 +0800 Subject: [PATCH 0039/1405] ext4: mark the group block bitmap as corrupted before reporting an error Otherwise unlocking the group in ext4_grp_locked_error may allow other processes to modify the core block bitmap that is known to be corrupt. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-9-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 2069d768c6198..c7f63dd7afccb 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -564,14 +564,14 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += EXT4_C2B(EXT4_SB(sb), first + i); + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(sb, e4b->bd_group, inode ? inode->i_ino : 0, blocknr, "freeing block already freed " "(bit %u)", first + i); - ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, - EXT4_GROUP_INFO_BBITMAP_CORRUPT); } mb_clear_bit(first + i, e4b->bd_info->bb_bitmap); } @@ -1933,12 +1933,12 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += EXT4_C2B(sbi, block); + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(sb, e4b->bd_group, inode ? inode->i_ino : 0, blocknr, "freeing already freed block (bit %u); block bitmap corrupt.", block); - ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, - EXT4_GROUP_INFO_BBITMAP_CORRUPT); return; } @@ -2406,12 +2406,12 @@ void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac, k = mb_find_next_zero_bit(buddy, max, 0); if (k >= max) { + ext4_mark_group_bitmap_corrupted(ac->ac_sb, + e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(ac->ac_sb, e4b->bd_group, 0, 0, "%d free clusters of order %d. But found 0", grp->bb_counters[i], i); - ext4_mark_group_bitmap_corrupted(ac->ac_sb, - e4b->bd_group, - EXT4_GROUP_INFO_BBITMAP_CORRUPT); break; } ac->ac_found++; @@ -2462,12 +2462,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, * free blocks even though group info says we * have free blocks */ + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, "%d free clusters as per " "group info. But bitmap says 0", free); - ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, - EXT4_GROUP_INFO_BBITMAP_CORRUPT); break; } @@ -2493,12 +2493,12 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, if (WARN_ON(ex.fe_len <= 0)) break; if (free < ex.fe_len) { + ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, + EXT4_GROUP_INFO_BBITMAP_CORRUPT); ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, "%d free clusters as per " "group info. But got %d blocks", free, ex.fe_len); - ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group, - EXT4_GROUP_INFO_BBITMAP_CORRUPT); /* * The number of free blocks differs. This mostly * indicate that the bitmap is corrupt. So exit -- GitLab From 133de5a0d8f8e32b34feaa8beae7a189482f1856 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 5 Jan 2024 17:20:54 +0800 Subject: [PATCH 0040/1405] ext4: remove unused return value of __mb_check_buddy Remove unused return value of __mb_check_buddy. Signed-off-by: Kemeng Shi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240105092102.496631-2-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c7f63dd7afccb..0d0917cf2e8fc 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -677,7 +677,7 @@ do { \ } \ } while (0) -static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, +static void __mb_check_buddy(struct ext4_buddy *e4b, char *file, const char *function, int line) { struct super_block *sb = e4b->bd_sb; @@ -696,7 +696,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, void *buddy2; if (e4b->bd_info->bb_check_counter++ % 10) - return 0; + return; while (order > 1) { buddy = mb_find_buddy(e4b, order, &max); @@ -758,7 +758,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, grp = ext4_get_group_info(sb, e4b->bd_group); if (!grp) - return NULL; + return; list_for_each(cur, &grp->bb_prealloc_list) { ext4_group_t groupnr; struct ext4_prealloc_space *pa; @@ -768,7 +768,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, for (i = 0; i < pa->pa_len; i++) MB_CHECK_ASSERT(mb_test_bit(k + i, buddy)); } - return 0; } #undef MB_CHECK_ASSERT #define mb_check_buddy(e4b) __mb_check_buddy(e4b, \ -- GitLab From 438a35e72d09c01da7ffb49570ecd11ca632270b Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 5 Jan 2024 17:20:55 +0800 Subject: [PATCH 0041/1405] ext4: remove unused parameter ngroup in ext4_mb_choose_next_group_*() Remove unused parameter ngroup in ext4_mb_choose_next_group_*(). Signed-off-by: Kemeng Shi Reviewed-by: Jan Kara Reviewed-by: Ojaswin Mujoo Link: https://lore.kernel.org/r/20240105092102.496631-3-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 0d0917cf2e8fc..9c867a13bf8db 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -870,7 +870,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp) * cr level needs an update. */ static void ext4_mb_choose_next_group_p2_aligned(struct ext4_allocation_context *ac, - enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) + enum criteria *new_cr, ext4_group_t *group) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_group_info *iter; @@ -944,7 +944,7 @@ ext4_mb_find_good_group_avg_frag_lists(struct ext4_allocation_context *ac, int o * order. Updates *new_cr if cr level needs an update. */ static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context *ac, - enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) + enum criteria *new_cr, ext4_group_t *group) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_group_info *grp = NULL; @@ -989,7 +989,7 @@ static void ext4_mb_choose_next_group_goal_fast(struct ext4_allocation_context * * much and fall to CR_GOAL_LEN_SLOW in that case. */ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context *ac, - enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups) + enum criteria *new_cr, ext4_group_t *group) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_group_info *grp = NULL; @@ -1124,11 +1124,11 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac, } if (*new_cr == CR_POWER2_ALIGNED) { - ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group, ngroups); + ext4_mb_choose_next_group_p2_aligned(ac, new_cr, group); } else if (*new_cr == CR_GOAL_LEN_FAST) { - ext4_mb_choose_next_group_goal_fast(ac, new_cr, group, ngroups); + ext4_mb_choose_next_group_goal_fast(ac, new_cr, group); } else if (*new_cr == CR_BEST_AVAIL_LEN) { - ext4_mb_choose_next_group_best_avail(ac, new_cr, group, ngroups); + ext4_mb_choose_next_group_best_avail(ac, new_cr, group); } else { /* * TODO: For CR=2, we can arrange groups in an rb tree sorted by -- GitLab From 11fd1a5d642355a45f4008d308399c26a8b3d3f0 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 5 Jan 2024 17:20:56 +0800 Subject: [PATCH 0042/1405] ext4: remove unneeded return value of ext4_mb_release_context Function ext4_mb_release_context always return 0 and the return value is never used. Just remove unneeded return value of ext4_mb_release_context. Signed-off-by: Kemeng Shi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240105092102.496631-4-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 9c867a13bf8db..dfd2de7aa0cfc 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5968,7 +5968,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) /* * release all resource we used in allocation */ -static int ext4_mb_release_context(struct ext4_allocation_context *ac) +static void ext4_mb_release_context(struct ext4_allocation_context *ac) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_prealloc_space *pa = ac->ac_pa; @@ -6005,7 +6005,6 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) mutex_unlock(&ac->ac_lg->lg_mutex); ext4_mb_collect_stats(ac); - return 0; } static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) -- GitLab From 97c32dbffce12136c06d9ceb6919850233d3a1c2 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 5 Jan 2024 17:20:57 +0800 Subject: [PATCH 0043/1405] ext4: remove unused ext4_allocation_context::ac_groups_considered Remove unused ext4_allocation_context::ac_groups_considered Signed-off-by: Kemeng Shi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240105092102.496631-5-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index d7aeb5da7d867..56938532b4ce2 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -192,7 +192,6 @@ struct ext4_allocation_context { */ ext4_grpblk_t ac_orig_goal_len; - __u32 ac_groups_considered; __u32 ac_flags; /* allocation hints */ __u16 ac_groups_scanned; __u16 ac_groups_linear_remaining; -- GitLab From 908177175a2ac7280cac738f33ccbbbcff035c5c Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 5 Jan 2024 17:20:58 +0800 Subject: [PATCH 0044/1405] ext4: remove unused return value of ext4_mb_release Remove unused return value of ext4_mb_release. Signed-off-by: Kemeng Shi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240105092102.496631-6-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 +- fs/ext4/mballoc.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a5d784872303d..cabce11778fc1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2912,7 +2912,7 @@ extern const struct seq_operations ext4_mb_seq_groups_ops; extern const struct seq_operations ext4_mb_seq_structs_summary_ops; extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset); extern int ext4_mb_init(struct super_block *); -extern int ext4_mb_release(struct super_block *); +extern void ext4_mb_release(struct super_block *); extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, struct ext4_allocation_request *, int *); extern void ext4_discard_preallocations(struct inode *, unsigned int); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index dfd2de7aa0cfc..26b199d2c3300 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3750,7 +3750,7 @@ static int ext4_mb_cleanup_pa(struct ext4_group_info *grp) return count; } -int ext4_mb_release(struct super_block *sb) +void ext4_mb_release(struct super_block *sb) { ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; @@ -3826,8 +3826,6 @@ int ext4_mb_release(struct super_block *sb) } free_percpu(sbi->s_locality_groups); - - return 0; } static inline int ext4_issue_discard(struct super_block *sb, -- GitLab From 820c280896eaf715b39ac1ad38976bcc749082b7 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 5 Jan 2024 17:20:59 +0800 Subject: [PATCH 0045/1405] ext4: remove unused return value of ext4_mb_release_inode_pa Remove unused return value of ext4_mb_release_inode_pa Signed-off-by: Kemeng Shi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240105092102.496631-7-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 26b199d2c3300..3349723ff93fb 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5307,7 +5307,7 @@ static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac) * the caller MUST hold group/inode locks. * TODO: optimize the case when there are no in-core structures yet */ -static noinline_for_stack int +static noinline_for_stack void ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, struct ext4_prealloc_space *pa) { @@ -5357,8 +5357,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, */ } atomic_add(free, &sbi->s_mb_discarded); - - return 0; } static noinline_for_stack int -- GitLab From 20427949b9b584422c05bb31e48b1b68615dd794 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 5 Jan 2024 17:21:00 +0800 Subject: [PATCH 0046/1405] ext4: remove unused return value of ext4_mb_release_group_pa Remove unused return value of ext4_mb_release_group_pa. Signed-off-by: Kemeng Shi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240105092102.496631-8-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 3349723ff93fb..a8e61bb181fa4 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5359,7 +5359,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, atomic_add(free, &sbi->s_mb_discarded); } -static noinline_for_stack int +static noinline_for_stack void ext4_mb_release_group_pa(struct ext4_buddy *e4b, struct ext4_prealloc_space *pa) { @@ -5373,13 +5373,11 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) { ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu", e4b->bd_group, group, pa->pa_pstart); - return 0; + return; } mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len); - - return 0; } /* -- GitLab From 2ffd2a6ad1d3a8213bb5805f45f49098fe615db1 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 5 Jan 2024 17:21:01 +0800 Subject: [PATCH 0047/1405] ext4: remove unnecessary parameter "needed" in ext4_discard_preallocations The "needed" controls the number of ext4_prealloc_space to discard in ext4_discard_preallocations. Function ext4_discard_preallocations is supposed to discard all non-used preallocated blocks when "needed" is 0 and now ext4_discard_preallocations is always called with "needed" = 0. Remove unnecessary parameter "needed" and remove all non-used preallocated spaces in ext4_discard_preallocations to simplify the code. Note: If count of non-used preallocated spaces could be more than UINT_MAX, there was a memory leak as some non-used preallocated spaces are left ununsed and this commit will fix it. Otherwise, there is no behavior change. Signed-off-by: Kemeng Shi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240105092102.496631-9-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 +- fs/ext4/extents.c | 10 +++++----- fs/ext4/file.c | 2 +- fs/ext4/indirect.c | 2 +- fs/ext4/inode.c | 6 +++--- fs/ext4/ioctl.c | 2 +- fs/ext4/mballoc.c | 10 +++------- fs/ext4/move_extent.c | 4 ++-- fs/ext4/super.c | 2 +- 9 files changed, 18 insertions(+), 22 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index cabce11778fc1..786b6857ab474 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2915,7 +2915,7 @@ extern int ext4_mb_init(struct super_block *); extern void ext4_mb_release(struct super_block *); extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, struct ext4_allocation_request *, int *); -extern void ext4_discard_preallocations(struct inode *, unsigned int); +extern void ext4_discard_preallocations(struct inode *); extern int __init ext4_init_mballoc(void); extern void ext4_exit_mballoc(void); extern ext4_group_t ext4_mb_prefetch(struct super_block *sb, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 01299b55a567a..9106715254ac0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -100,7 +100,7 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped) * i_rwsem. So we can safely drop the i_data_sem here. */ BUG_ON(EXT4_JOURNAL(inode) == NULL); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); *dropped = 1; return 0; @@ -4313,7 +4313,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * not a good idea to call discard here directly, * but otherwise we'd need to call it every free(). */ - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE; ext4_free_blocks(handle, inode, NULL, newblock, @@ -5357,7 +5357,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len) ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); ext4_es_remove_extent(inode, punch_start, EXT_MAX_BLOCKS - punch_start); ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1); @@ -5365,7 +5365,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len) up_write(&EXT4_I(inode)->i_data_sem); goto out_stop; } - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); ret = ext4_ext_shift_extents(inode, handle, punch_stop, punch_stop - punch_start, SHIFT_LEFT); @@ -5497,7 +5497,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len) goto out_stop; down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); path = ext4_find_extent(inode, offset_lblk, NULL, 0); if (IS_ERR(path)) { diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 6aa15dafc6778..54d6ff22585cf 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -174,7 +174,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp) (atomic_read(&inode->i_writecount) == 1) && !EXT4_I(inode)->i_reserved_data_blocks) { down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); } if (is_dx(inode) && filp->private_data) diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index a9f3716119d37..d8ca7f64f9523 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -714,7 +714,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode, * i_rwsem. So we can safely drop the i_data_sem here. */ BUG_ON(EXT4_JOURNAL(inode) == NULL); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); *dropped = 1; return 0; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5af1b0b8680e9..4cae8698f70cf 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -371,7 +371,7 @@ void ext4_da_update_reserve_space(struct inode *inode, */ if ((ei->i_reserved_data_blocks == 0) && !inode_is_open_for_write(inode)) - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); } static int __check_block_validity(struct inode *inode, const char *func, @@ -4017,7 +4017,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) if (stop_block > first_block) { down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); ext4_es_remove_extent(inode, first_block, stop_block - first_block); @@ -4170,7 +4170,7 @@ int ext4_truncate(struct inode *inode) down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) err = ext4_ext_truncate(handle, inode); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index aa6be510eb8f5..7160a71044c88 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -467,7 +467,7 @@ static long swap_inode_boot_loader(struct super_block *sb, ext4_reset_inode_seed(inode); ext4_reset_inode_seed(inode_bl); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); err = ext4_mark_inode_dirty(handle, inode); if (err < 0) { diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a8e61bb181fa4..f3da7db2beee5 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5498,7 +5498,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, * * FIXME!! Make sure it is valid at all the call sites */ -void ext4_discard_preallocations(struct inode *inode, unsigned int needed) +void ext4_discard_preallocations(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); struct super_block *sb = inode->i_sb; @@ -5520,15 +5520,12 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed) mb_debug(sb, "discard preallocation for inode %lu\n", inode->i_ino); trace_ext4_discard_preallocations(inode, - atomic_read(&ei->i_prealloc_active), needed); - - if (needed == 0) - needed = UINT_MAX; + atomic_read(&ei->i_prealloc_active), 0); repeat: /* first, collect all pa's in the inode */ write_lock(&ei->i_prealloc_lock); - for (iter = rb_first(&ei->i_prealloc_node); iter && needed; + for (iter = rb_first(&ei->i_prealloc_node); iter; iter = rb_next(iter)) { pa = rb_entry(iter, struct ext4_prealloc_space, pa_node.inode_node); @@ -5552,7 +5549,6 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed) spin_unlock(&pa->pa_lock); rb_erase(&pa->pa_node.inode_node, &ei->i_prealloc_node); list_add(&pa->u.pa_tmp_list, &list); - needed--; continue; } diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 391efa6d4c56f..7cd4afa4de1d3 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -686,8 +686,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, out: if (*moved_len) { - ext4_discard_preallocations(orig_inode, 0); - ext4_discard_preallocations(donor_inode, 0); + ext4_discard_preallocations(orig_inode); + ext4_discard_preallocations(donor_inode); } ext4_free_ext_path(path); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index dcba0f85dfe24..0f931d0c227da 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1525,7 +1525,7 @@ void ext4_clear_inode(struct inode *inode) ext4_fc_del(inode); invalidate_inode_buffers(inode); clear_inode(inode); - ext4_discard_preallocations(inode, 0); + ext4_discard_preallocations(inode); ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); dquot_drop(inode); if (EXT4_I(inode)->jinode) { -- GitLab From f0e54b6087de9571ec61c189d6c378b81edbe3b2 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Fri, 5 Jan 2024 17:21:02 +0800 Subject: [PATCH 0048/1405] ext4: remove 'needed' in trace_ext4_discard_preallocations As 'needed' to trace_ext4_discard_preallocations is always 0 which is meaningless. Just remove it. Signed-off-by: Kemeng Shi Suggested-by: Jan Kara Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240105092102.496631-10-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 5 ++--- include/trace/events/ext4.h | 11 ++++------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f3da7db2beee5..e4f7cf9d89c45 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5510,9 +5510,8 @@ void ext4_discard_preallocations(struct inode *inode) struct rb_node *iter; int err; - if (!S_ISREG(inode->i_mode)) { + if (!S_ISREG(inode->i_mode)) return; - } if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) return; @@ -5520,7 +5519,7 @@ void ext4_discard_preallocations(struct inode *inode) mb_debug(sb, "discard preallocation for inode %lu\n", inode->i_ino); trace_ext4_discard_preallocations(inode, - atomic_read(&ei->i_prealloc_active), 0); + atomic_read(&ei->i_prealloc_active)); repeat: /* first, collect all pa's in the inode */ diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 65029dfb92fbc..a697f4b77162d 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -772,15 +772,14 @@ TRACE_EVENT(ext4_mb_release_group_pa, ); TRACE_EVENT(ext4_discard_preallocations, - TP_PROTO(struct inode *inode, unsigned int len, unsigned int needed), + TP_PROTO(struct inode *inode, unsigned int len), - TP_ARGS(inode, len, needed), + TP_ARGS(inode, len), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( unsigned int, len ) - __field( unsigned int, needed ) ), @@ -788,13 +787,11 @@ TRACE_EVENT(ext4_discard_preallocations, __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->len = len; - __entry->needed = needed; ), - TP_printk("dev %d,%d ino %lu len: %u needed %u", + TP_printk("dev %d,%d ino %lu len: %u", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, __entry->len, - __entry->needed) + (unsigned long) __entry->ino, __entry->len) ); TRACE_EVENT(ext4_mb_discard_preallocations, -- GitLab From 367188297254e7f81e3c3c94e6d6a623f757c4cb Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 27 Nov 2023 16:11:09 +0530 Subject: [PATCH 0049/1405] RISC-V: KVM: Allow Zbc extension for Guest/VM We extend the KVM ISA extension ONE_REG interface to allow KVM user space to detect and enable Zbc extension for Guest/VM. Signed-off-by: Anup Patel Reviewed-by: Andrew Jones Signed-off-by: Anup Patel --- arch/riscv/include/uapi/asm/kvm.h | 1 + arch/riscv/kvm/vcpu_onereg.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index d6b7a5b958742..bbff9c7e8f3f8 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -139,6 +139,7 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZIHPM, KVM_RISCV_ISA_EXT_SMSTATEEN, KVM_RISCV_ISA_EXT_ZICOND, + KVM_RISCV_ISA_EXT_ZBC, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index fc34557f5356e..4522fdfec94ef 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -42,6 +42,7 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(SVPBMT), KVM_ISA_EXT_ARR(ZBA), KVM_ISA_EXT_ARR(ZBB), + KVM_ISA_EXT_ARR(ZBC), KVM_ISA_EXT_ARR(ZBS), KVM_ISA_EXT_ARR(ZICBOM), KVM_ISA_EXT_ARR(ZICBOZ), @@ -92,6 +93,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_RISCV_ISA_EXT_ZBA: case KVM_RISCV_ISA_EXT_ZBB: + case KVM_RISCV_ISA_EXT_ZBC: case KVM_RISCV_ISA_EXT_ZBS: case KVM_RISCV_ISA_EXT_ZICNTR: case KVM_RISCV_ISA_EXT_ZICOND: -- GitLab From ac396141308d07a9534c5a7f1f7c80cb95e35b20 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 27 Nov 2023 18:47:23 +0530 Subject: [PATCH 0050/1405] KVM: riscv: selftests: Add Zbc extension to get-reg-list test The KVM RISC-V allows Zbc extension for Guest/VM so let us add this extension to get-reg-list test. Signed-off-by: Anup Patel Reviewed-by: Andrew Jones Signed-off-by: Anup Patel --- tools/testing/selftests/kvm/riscv/get-reg-list.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 6652108816db4..2e8e1c52b9efb 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -49,6 +49,7 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBC: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBS: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ: @@ -394,6 +395,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(SVPBMT), KVM_ISA_EXT_ARR(ZBA), KVM_ISA_EXT_ARR(ZBB), + KVM_ISA_EXT_ARR(ZBC), KVM_ISA_EXT_ARR(ZBS), KVM_ISA_EXT_ARR(ZICBOM), KVM_ISA_EXT_ARR(ZICBOZ), @@ -888,6 +890,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT); KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT); KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA); KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB); +KVM_ISA_EXT_SIMPLE_CONFIG(zbc, ZBC); KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS); KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM); KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ); @@ -914,6 +917,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_svpbmt, &config_zba, &config_zbb, + &config_zbc, &config_zbs, &config_zicbom, &config_zicboz, -- GitLab From f370b4e668f017f523968f7490163fa922dcd92e Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 27 Nov 2023 20:36:36 +0530 Subject: [PATCH 0051/1405] RISC-V: KVM: Allow scalar crypto extensions for Guest/VM We extend the KVM ISA extension ONE_REG interface to allow KVM user space to detect and enable scalar crypto extensions for Guest/VM. This includes extensions Zbkb, Zbkc, Zbkx, Zknd, Zkne, Zknh, Zkr, Zksed, Zksh, and Zkt. Signed-off-by: Anup Patel Reviewed-by: Andrew Jones Signed-off-by: Anup Patel --- arch/riscv/include/uapi/asm/kvm.h | 10 ++++++++++ arch/riscv/kvm/vcpu_onereg.c | 20 ++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index bbff9c7e8f3f8..453edf620b87b 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -140,6 +140,16 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_SMSTATEEN, KVM_RISCV_ISA_EXT_ZICOND, KVM_RISCV_ISA_EXT_ZBC, + KVM_RISCV_ISA_EXT_ZBKB, + KVM_RISCV_ISA_EXT_ZBKC, + KVM_RISCV_ISA_EXT_ZBKX, + KVM_RISCV_ISA_EXT_ZKND, + KVM_RISCV_ISA_EXT_ZKNE, + KVM_RISCV_ISA_EXT_ZKNH, + KVM_RISCV_ISA_EXT_ZKR, + KVM_RISCV_ISA_EXT_ZKSED, + KVM_RISCV_ISA_EXT_ZKSH, + KVM_RISCV_ISA_EXT_ZKT, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 4522fdfec94ef..680da2a55da25 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -43,6 +43,9 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZBA), KVM_ISA_EXT_ARR(ZBB), KVM_ISA_EXT_ARR(ZBC), + KVM_ISA_EXT_ARR(ZBKB), + KVM_ISA_EXT_ARR(ZBKC), + KVM_ISA_EXT_ARR(ZBKX), KVM_ISA_EXT_ARR(ZBS), KVM_ISA_EXT_ARR(ZICBOM), KVM_ISA_EXT_ARR(ZICBOZ), @@ -52,6 +55,13 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZIFENCEI), KVM_ISA_EXT_ARR(ZIHINTPAUSE), KVM_ISA_EXT_ARR(ZIHPM), + KVM_ISA_EXT_ARR(ZKND), + KVM_ISA_EXT_ARR(ZKNE), + KVM_ISA_EXT_ARR(ZKNH), + KVM_ISA_EXT_ARR(ZKR), + KVM_ISA_EXT_ARR(ZKSED), + KVM_ISA_EXT_ARR(ZKSH), + KVM_ISA_EXT_ARR(ZKT), }; static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext) @@ -94,6 +104,9 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_ZBA: case KVM_RISCV_ISA_EXT_ZBB: case KVM_RISCV_ISA_EXT_ZBC: + case KVM_RISCV_ISA_EXT_ZBKB: + case KVM_RISCV_ISA_EXT_ZBKC: + case KVM_RISCV_ISA_EXT_ZBKX: case KVM_RISCV_ISA_EXT_ZBS: case KVM_RISCV_ISA_EXT_ZICNTR: case KVM_RISCV_ISA_EXT_ZICOND: @@ -101,6 +114,13 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_ZIFENCEI: case KVM_RISCV_ISA_EXT_ZIHINTPAUSE: case KVM_RISCV_ISA_EXT_ZIHPM: + case KVM_RISCV_ISA_EXT_ZKND: + case KVM_RISCV_ISA_EXT_ZKNE: + case KVM_RISCV_ISA_EXT_ZKNH: + case KVM_RISCV_ISA_EXT_ZKR: + case KVM_RISCV_ISA_EXT_ZKSED: + case KVM_RISCV_ISA_EXT_ZKSH: + case KVM_RISCV_ISA_EXT_ZKT: return false; /* Extensions which can be disabled using Smstateen */ case KVM_RISCV_ISA_EXT_SSAIA: -- GitLab From 14d70de562dfd78be638fc59b0a323235acc67be Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 27 Nov 2023 20:47:58 +0530 Subject: [PATCH 0052/1405] KVM: riscv: selftests: Add scaler crypto extensions to get-reg-list test The KVM RISC-V allows scaler crypto extensions for Guest/VM so let us add these extensions to get-reg-list test. This includes extensions Zbkb, Zbkc, Zbkx, Zknd, Zkne, Zknh, Zkr, Zksed, Zksh, and Zkt. Signed-off-by: Anup Patel Reviewed-by: Andrew Jones Signed-off-by: Anup Patel --- .../selftests/kvm/riscv/get-reg-list.c | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 2e8e1c52b9efb..3f5674fbd6806 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -50,6 +50,9 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKB: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKX: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBS: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ: @@ -59,6 +62,13 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIFENCEI: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTPAUSE: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHPM: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKND: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNE: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNH: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKR: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKSED: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKSH: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKT: /* * Like ISA_EXT registers, SBI_EXT registers are only visible when the * host supports them and disabling them does not affect the visibility @@ -396,6 +406,9 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZBA), KVM_ISA_EXT_ARR(ZBB), KVM_ISA_EXT_ARR(ZBC), + KVM_ISA_EXT_ARR(ZBKB), + KVM_ISA_EXT_ARR(ZBKC), + KVM_ISA_EXT_ARR(ZBKX), KVM_ISA_EXT_ARR(ZBS), KVM_ISA_EXT_ARR(ZICBOM), KVM_ISA_EXT_ARR(ZICBOZ), @@ -405,6 +418,13 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZIFENCEI), KVM_ISA_EXT_ARR(ZIHINTPAUSE), KVM_ISA_EXT_ARR(ZIHPM), + KVM_ISA_EXT_ARR(ZKND), + KVM_ISA_EXT_ARR(ZKNE), + KVM_ISA_EXT_ARR(ZKNH), + KVM_ISA_EXT_ARR(ZKR), + KVM_ISA_EXT_ARR(ZKSED), + KVM_ISA_EXT_ARR(ZKSH), + KVM_ISA_EXT_ARR(ZKT), }; if (reg_off >= ARRAY_SIZE(kvm_isa_ext_reg_name)) @@ -891,6 +911,9 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT); KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA); KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB); KVM_ISA_EXT_SIMPLE_CONFIG(zbc, ZBC); +KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB); +KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC); +KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX); KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS); KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM); KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ); @@ -900,6 +923,13 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zicsr, ZICSR); KVM_ISA_EXT_SIMPLE_CONFIG(zifencei, ZIFENCEI); KVM_ISA_EXT_SIMPLE_CONFIG(zihintpause, ZIHINTPAUSE); KVM_ISA_EXT_SIMPLE_CONFIG(zihpm, ZIHPM); +KVM_ISA_EXT_SIMPLE_CONFIG(zknd, ZKND); +KVM_ISA_EXT_SIMPLE_CONFIG(zkne, ZKNE); +KVM_ISA_EXT_SIMPLE_CONFIG(zknh, ZKNH); +KVM_ISA_EXT_SIMPLE_CONFIG(zkr, ZKR); +KVM_ISA_EXT_SIMPLE_CONFIG(zksed, ZKSED); +KVM_ISA_EXT_SIMPLE_CONFIG(zksh, ZKSH); +KVM_ISA_EXT_SIMPLE_CONFIG(zkt, ZKT); struct vcpu_reg_list *vcpu_configs[] = { &config_sbi_base, @@ -918,6 +948,9 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zba, &config_zbb, &config_zbc, + &config_zbkb, + &config_zbkc, + &config_zbkx, &config_zbs, &config_zicbom, &config_zicboz, @@ -927,5 +960,12 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zifencei, &config_zihintpause, &config_zihpm, + &config_zknd, + &config_zkne, + &config_zknh, + &config_zkr, + &config_zksed, + &config_zksh, + &config_zkt, }; int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); -- GitLab From afd1ef3adfbc36e35fcf4f742fd90aea6480a276 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 27 Nov 2023 21:38:43 +0530 Subject: [PATCH 0053/1405] RISC-V: KVM: Allow vector crypto extensions for Guest/VM We extend the KVM ISA extension ONE_REG interface to allow KVM user space to detect and enable vector crypto extensions for Guest/VM. This includes extensions Zvbb, Zvbc, Zvkb, Zvkg, Zvkned, Zvknha, Zvknhb, Zvksed, Zvksh, and Zvkt. Signed-off-by: Anup Patel Reviewed-by: Andrew Jones Signed-off-by: Anup Patel --- arch/riscv/include/uapi/asm/kvm.h | 10 ++++++++++ arch/riscv/kvm/vcpu_onereg.c | 20 ++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 453edf620b87b..e68ba0819ef75 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -150,6 +150,16 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZKSED, KVM_RISCV_ISA_EXT_ZKSH, KVM_RISCV_ISA_EXT_ZKT, + KVM_RISCV_ISA_EXT_ZVBB, + KVM_RISCV_ISA_EXT_ZVBC, + KVM_RISCV_ISA_EXT_ZVKB, + KVM_RISCV_ISA_EXT_ZVKG, + KVM_RISCV_ISA_EXT_ZVKNED, + KVM_RISCV_ISA_EXT_ZVKNHA, + KVM_RISCV_ISA_EXT_ZVKNHB, + KVM_RISCV_ISA_EXT_ZVKSED, + KVM_RISCV_ISA_EXT_ZVKSH, + KVM_RISCV_ISA_EXT_ZVKT, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 680da2a55da25..297acdcfed775 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -62,6 +62,16 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZKSED), KVM_ISA_EXT_ARR(ZKSH), KVM_ISA_EXT_ARR(ZKT), + KVM_ISA_EXT_ARR(ZVBB), + KVM_ISA_EXT_ARR(ZVBC), + KVM_ISA_EXT_ARR(ZVKB), + KVM_ISA_EXT_ARR(ZVKG), + KVM_ISA_EXT_ARR(ZVKNED), + KVM_ISA_EXT_ARR(ZVKNHA), + KVM_ISA_EXT_ARR(ZVKNHB), + KVM_ISA_EXT_ARR(ZVKSED), + KVM_ISA_EXT_ARR(ZVKSH), + KVM_ISA_EXT_ARR(ZVKT), }; static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext) @@ -121,6 +131,16 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_ZKSED: case KVM_RISCV_ISA_EXT_ZKSH: case KVM_RISCV_ISA_EXT_ZKT: + case KVM_RISCV_ISA_EXT_ZVBB: + case KVM_RISCV_ISA_EXT_ZVBC: + case KVM_RISCV_ISA_EXT_ZVKB: + case KVM_RISCV_ISA_EXT_ZVKG: + case KVM_RISCV_ISA_EXT_ZVKNED: + case KVM_RISCV_ISA_EXT_ZVKNHA: + case KVM_RISCV_ISA_EXT_ZVKNHB: + case KVM_RISCV_ISA_EXT_ZVKSED: + case KVM_RISCV_ISA_EXT_ZVKSH: + case KVM_RISCV_ISA_EXT_ZVKT: return false; /* Extensions which can be disabled using Smstateen */ case KVM_RISCV_ISA_EXT_SSAIA: -- GitLab From 2ddf79070f7edada19fecec57d8591d6b718fa53 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 27 Nov 2023 21:54:33 +0530 Subject: [PATCH 0054/1405] KVM: riscv: selftests: Add vector crypto extensions to get-reg-list test The KVM RISC-V allows vector crypto extensions for Guest/VM so let us add these extensions to get-reg-list test. This includes extensions Zvbb, Zvbc, Zvkb, Zvkg, Zvkned, Zvknha, Zvknhb, Zvksed, Zvksh, and Zvkt. Signed-off-by: Anup Patel Reviewed-by: Andrew Jones Signed-off-by: Anup Patel --- .../selftests/kvm/riscv/get-reg-list.c | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 3f5674fbd6806..6b4ba06e26e57 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -69,6 +69,16 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKSED: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKSH: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKT: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBB: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKB: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKG: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNED: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNHA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNHB: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKSED: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKSH: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKT: /* * Like ISA_EXT registers, SBI_EXT registers are only visible when the * host supports them and disabling them does not affect the visibility @@ -425,6 +435,16 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZKSED), KVM_ISA_EXT_ARR(ZKSH), KVM_ISA_EXT_ARR(ZKT), + KVM_ISA_EXT_ARR(ZVBB), + KVM_ISA_EXT_ARR(ZVBC), + KVM_ISA_EXT_ARR(ZVKB), + KVM_ISA_EXT_ARR(ZVKG), + KVM_ISA_EXT_ARR(ZVKNED), + KVM_ISA_EXT_ARR(ZVKNHA), + KVM_ISA_EXT_ARR(ZVKNHB), + KVM_ISA_EXT_ARR(ZVKSED), + KVM_ISA_EXT_ARR(ZVKSH), + KVM_ISA_EXT_ARR(ZVKT), }; if (reg_off >= ARRAY_SIZE(kvm_isa_ext_reg_name)) @@ -930,6 +950,16 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zkr, ZKR); KVM_ISA_EXT_SIMPLE_CONFIG(zksed, ZKSED); KVM_ISA_EXT_SIMPLE_CONFIG(zksh, ZKSH); KVM_ISA_EXT_SIMPLE_CONFIG(zkt, ZKT); +KVM_ISA_EXT_SIMPLE_CONFIG(zvbb, ZVBB); +KVM_ISA_EXT_SIMPLE_CONFIG(zvbc, ZVBC); +KVM_ISA_EXT_SIMPLE_CONFIG(zvkb, ZVKB); +KVM_ISA_EXT_SIMPLE_CONFIG(zvkg, ZVKG); +KVM_ISA_EXT_SIMPLE_CONFIG(zvkned, ZVKNED); +KVM_ISA_EXT_SIMPLE_CONFIG(zvknha, ZVKNHA); +KVM_ISA_EXT_SIMPLE_CONFIG(zvknhb, ZVKNHB); +KVM_ISA_EXT_SIMPLE_CONFIG(zvksed, ZVKSED); +KVM_ISA_EXT_SIMPLE_CONFIG(zvksh, ZVKSH); +KVM_ISA_EXT_SIMPLE_CONFIG(zvkt, ZVKT); struct vcpu_reg_list *vcpu_configs[] = { &config_sbi_base, @@ -967,5 +997,15 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zksed, &config_zksh, &config_zkt, + &config_zvbb, + &config_zvbc, + &config_zvkb, + &config_zvkg, + &config_zvkned, + &config_zvknha, + &config_zvknhb, + &config_zvksed, + &config_zvksh, + &config_zvkt, }; int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); -- GitLab From f3901ece5b3894177d1816208d0fb06b295617e0 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 27 Nov 2023 22:01:55 +0530 Subject: [PATCH 0055/1405] RISC-V: KVM: Allow Zfh[min] extensions for Guest/VM We extend the KVM ISA extension ONE_REG interface to allow KVM user space to detect and enable Zfh[min] extensions for Guest/VM. Signed-off-by: Anup Patel Reviewed-by: Andrew Jones Signed-off-by: Anup Patel --- arch/riscv/include/uapi/asm/kvm.h | 2 ++ arch/riscv/kvm/vcpu_onereg.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index e68ba0819ef75..a8411ae5cc85c 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -160,6 +160,8 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZVKSED, KVM_RISCV_ISA_EXT_ZVKSH, KVM_RISCV_ISA_EXT_ZVKT, + KVM_RISCV_ISA_EXT_ZFH, + KVM_RISCV_ISA_EXT_ZFHMIN, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 297acdcfed775..e00745bf05905 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -47,6 +47,8 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZBKC), KVM_ISA_EXT_ARR(ZBKX), KVM_ISA_EXT_ARR(ZBS), + KVM_ISA_EXT_ARR(ZFH), + KVM_ISA_EXT_ARR(ZFHMIN), KVM_ISA_EXT_ARR(ZICBOM), KVM_ISA_EXT_ARR(ZICBOZ), KVM_ISA_EXT_ARR(ZICNTR), @@ -118,6 +120,8 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_ZBKC: case KVM_RISCV_ISA_EXT_ZBKX: case KVM_RISCV_ISA_EXT_ZBS: + case KVM_RISCV_ISA_EXT_ZFH: + case KVM_RISCV_ISA_EXT_ZFHMIN: case KVM_RISCV_ISA_EXT_ZICNTR: case KVM_RISCV_ISA_EXT_ZICOND: case KVM_RISCV_ISA_EXT_ZICSR: -- GitLab From 496ee21a17ce45e92483fdf1827ba91f4867f160 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 27 Nov 2023 22:11:02 +0530 Subject: [PATCH 0056/1405] KVM: riscv: selftests: Add Zfh[min] extensions to get-reg-list test The KVM RISC-V allows Zfh[min] extensions for Guest/VM so let us add these extensions to get-reg-list test. Signed-off-by: Anup Patel Reviewed-by: Andrew Jones Signed-off-by: Anup Patel --- tools/testing/selftests/kvm/riscv/get-reg-list.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 6b4ba06e26e57..eeb9857feede1 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -54,6 +54,8 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKC: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKX: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBS: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFH: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICNTR: @@ -420,6 +422,8 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZBKC), KVM_ISA_EXT_ARR(ZBKX), KVM_ISA_EXT_ARR(ZBS), + KVM_ISA_EXT_ARR(ZFH), + KVM_ISA_EXT_ARR(ZFHMIN), KVM_ISA_EXT_ARR(ZICBOM), KVM_ISA_EXT_ARR(ZICBOZ), KVM_ISA_EXT_ARR(ZICNTR), @@ -935,6 +939,8 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB); KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC); KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX); KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS); +KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH); +KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN); KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM); KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ); KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR); @@ -982,6 +988,8 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zbkc, &config_zbkx, &config_zbs, + &config_zfh, + &config_zfhmin, &config_zicbom, &config_zicboz, &config_zicntr, -- GitLab From ab6da9cdc3f3d1d091d657219fb6e98f710ee098 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 27 Nov 2023 22:15:10 +0530 Subject: [PATCH 0057/1405] RISC-V: KVM: Allow Zihintntl extension for Guest/VM We extend the KVM ISA extension ONE_REG interface to allow KVM user space to detect and enable Zihintntl extension for Guest/VM. Signed-off-by: Anup Patel Reviewed-by: Andrew Jones Signed-off-by: Anup Patel --- arch/riscv/include/uapi/asm/kvm.h | 1 + arch/riscv/kvm/vcpu_onereg.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index a8411ae5cc85c..95e4d5a1793e7 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -162,6 +162,7 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZVKT, KVM_RISCV_ISA_EXT_ZFH, KVM_RISCV_ISA_EXT_ZFHMIN, + KVM_RISCV_ISA_EXT_ZIHINTNTL, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index e00745bf05905..deceaa6f9cfa2 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -55,6 +55,7 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZICOND), KVM_ISA_EXT_ARR(ZICSR), KVM_ISA_EXT_ARR(ZIFENCEI), + KVM_ISA_EXT_ARR(ZIHINTNTL), KVM_ISA_EXT_ARR(ZIHINTPAUSE), KVM_ISA_EXT_ARR(ZIHPM), KVM_ISA_EXT_ARR(ZKND), @@ -126,6 +127,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_ZICOND: case KVM_RISCV_ISA_EXT_ZICSR: case KVM_RISCV_ISA_EXT_ZIFENCEI: + case KVM_RISCV_ISA_EXT_ZIHINTNTL: case KVM_RISCV_ISA_EXT_ZIHINTPAUSE: case KVM_RISCV_ISA_EXT_ZIHPM: case KVM_RISCV_ISA_EXT_ZKND: -- GitLab From 1a3bc507821d24a80a6af8beb08af8032c33ebd7 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 27 Nov 2023 22:22:06 +0530 Subject: [PATCH 0058/1405] KVM: riscv: selftests: Add Zihintntl extension to get-reg-list test The KVM RISC-V allows Zihintntl extension for Guest/VM so let us add this extension to get-reg-list test. Signed-off-by: Anup Patel Reviewed-by: Andrew Jones Signed-off-by: Anup Patel --- tools/testing/selftests/kvm/riscv/get-reg-list.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index eeb9857feede1..c9a45e17267bf 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -62,6 +62,7 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICOND: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICSR: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIFENCEI: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTNTL: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTPAUSE: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHPM: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKND: @@ -430,6 +431,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZICOND), KVM_ISA_EXT_ARR(ZICSR), KVM_ISA_EXT_ARR(ZIFENCEI), + KVM_ISA_EXT_ARR(ZIHINTNTL), KVM_ISA_EXT_ARR(ZIHINTPAUSE), KVM_ISA_EXT_ARR(ZIHPM), KVM_ISA_EXT_ARR(ZKND), @@ -947,6 +949,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR); KVM_ISA_EXT_SIMPLE_CONFIG(zicond, ZICOND); KVM_ISA_EXT_SIMPLE_CONFIG(zicsr, ZICSR); KVM_ISA_EXT_SIMPLE_CONFIG(zifencei, ZIFENCEI); +KVM_ISA_EXT_SIMPLE_CONFIG(zihintntl, ZIHINTNTL); KVM_ISA_EXT_SIMPLE_CONFIG(zihintpause, ZIHINTPAUSE); KVM_ISA_EXT_SIMPLE_CONFIG(zihpm, ZIHPM); KVM_ISA_EXT_SIMPLE_CONFIG(zknd, ZKND); @@ -996,6 +999,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zicond, &config_zicsr, &config_zifencei, + &config_zihintntl, &config_zihintpause, &config_zihpm, &config_zknd, -- GitLab From f46300285926c2b0d0c79bf40c87d45e169cecb6 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 27 Nov 2023 22:26:05 +0530 Subject: [PATCH 0059/1405] RISC-V: KVM: Allow Zvfh[min] extensions for Guest/VM We extend the KVM ISA extension ONE_REG interface to allow KVM user space to detect and enable Zvfh[min] extensions for Guest/VM. Signed-off-by: Anup Patel Reviewed-by: Andrew Jones Signed-off-by: Anup Patel --- arch/riscv/include/uapi/asm/kvm.h | 2 ++ arch/riscv/kvm/vcpu_onereg.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 95e4d5a1793e7..7d07722aa1aaf 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -163,6 +163,8 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZFH, KVM_RISCV_ISA_EXT_ZFHMIN, KVM_RISCV_ISA_EXT_ZIHINTNTL, + KVM_RISCV_ISA_EXT_ZVFH, + KVM_RISCV_ISA_EXT_ZVFHMIN, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index deceaa6f9cfa2..707d9d9883c85 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -67,6 +67,8 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZKT), KVM_ISA_EXT_ARR(ZVBB), KVM_ISA_EXT_ARR(ZVBC), + KVM_ISA_EXT_ARR(ZVFH), + KVM_ISA_EXT_ARR(ZVFHMIN), KVM_ISA_EXT_ARR(ZVKB), KVM_ISA_EXT_ARR(ZVKG), KVM_ISA_EXT_ARR(ZVKNED), @@ -139,6 +141,8 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_ZKT: case KVM_RISCV_ISA_EXT_ZVBB: case KVM_RISCV_ISA_EXT_ZVBC: + case KVM_RISCV_ISA_EXT_ZVFH: + case KVM_RISCV_ISA_EXT_ZVFHMIN: case KVM_RISCV_ISA_EXT_ZVKB: case KVM_RISCV_ISA_EXT_ZVKG: case KVM_RISCV_ISA_EXT_ZVKNED: -- GitLab From 1216fdd99be113fa75ccdd0497802bd0fe4369aa Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 27 Nov 2023 22:35:06 +0530 Subject: [PATCH 0060/1405] KVM: riscv: selftests: Add Zvfh[min] extensions to get-reg-list test The KVM RISC-V allows Zvfh[min] extensions for Guest/VM so let us add these extensions to get-reg-list test. Signed-off-by: Anup Patel Reviewed-by: Andrew Jones Signed-off-by: Anup Patel --- tools/testing/selftests/kvm/riscv/get-reg-list.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index c9a45e17267bf..02ec1a44624a8 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -74,6 +74,8 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBB: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFH: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFHMIN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKB: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKG: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNED: @@ -443,6 +445,8 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZKT), KVM_ISA_EXT_ARR(ZVBB), KVM_ISA_EXT_ARR(ZVBC), + KVM_ISA_EXT_ARR(ZVFH), + KVM_ISA_EXT_ARR(ZVFHMIN), KVM_ISA_EXT_ARR(ZVKB), KVM_ISA_EXT_ARR(ZVKG), KVM_ISA_EXT_ARR(ZVKNED), @@ -961,6 +965,8 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zksh, ZKSH); KVM_ISA_EXT_SIMPLE_CONFIG(zkt, ZKT); KVM_ISA_EXT_SIMPLE_CONFIG(zvbb, ZVBB); KVM_ISA_EXT_SIMPLE_CONFIG(zvbc, ZVBC); +KVM_ISA_EXT_SIMPLE_CONFIG(zvfh, ZVFH); +KVM_ISA_EXT_SIMPLE_CONFIG(zvfhmin, ZVFHMIN); KVM_ISA_EXT_SIMPLE_CONFIG(zvkb, ZVKB); KVM_ISA_EXT_SIMPLE_CONFIG(zvkg, ZVKG); KVM_ISA_EXT_SIMPLE_CONFIG(zvkned, ZVKNED); @@ -1011,6 +1017,8 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zkt, &config_zvbb, &config_zvbc, + &config_zvfh, + &config_zvfhmin, &config_zvkb, &config_zvkg, &config_zvkned, -- GitLab From 41182cc6f507011a2e6c82657779e451ed9942bb Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 27 Nov 2023 22:43:12 +0530 Subject: [PATCH 0061/1405] RISC-V: KVM: Allow Zfa extension for Guest/VM We extend the KVM ISA extension ONE_REG interface to allow KVM user space to detect and enable Zfa extension for Guest/VM. Signed-off-by: Anup Patel Reviewed-by: Andrew Jones Signed-off-by: Anup Patel --- arch/riscv/include/uapi/asm/kvm.h | 1 + arch/riscv/kvm/vcpu_onereg.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 7d07722aa1aaf..7499e88a947c0 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -165,6 +165,7 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZIHINTNTL, KVM_RISCV_ISA_EXT_ZVFH, KVM_RISCV_ISA_EXT_ZVFHMIN, + KVM_RISCV_ISA_EXT_ZFA, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 707d9d9883c85..5f7355e960084 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -47,6 +47,7 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZBKC), KVM_ISA_EXT_ARR(ZBKX), KVM_ISA_EXT_ARR(ZBS), + KVM_ISA_EXT_ARR(ZFA), KVM_ISA_EXT_ARR(ZFH), KVM_ISA_EXT_ARR(ZFHMIN), KVM_ISA_EXT_ARR(ZICBOM), @@ -123,6 +124,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_ZBKC: case KVM_RISCV_ISA_EXT_ZBKX: case KVM_RISCV_ISA_EXT_ZBS: + case KVM_RISCV_ISA_EXT_ZFA: case KVM_RISCV_ISA_EXT_ZFH: case KVM_RISCV_ISA_EXT_ZFHMIN: case KVM_RISCV_ISA_EXT_ZICNTR: -- GitLab From 4d0e8f9a361b3a1f7b67418c536b258323de734f Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 27 Nov 2023 22:45:35 +0530 Subject: [PATCH 0062/1405] KVM: riscv: selftests: Add Zfa extension to get-reg-list test The KVM RISC-V allows Zfa extension for Guest/VM so let us add this extension to get-reg-list test. Signed-off-by: Anup Patel Reviewed-by: Andrew Jones Signed-off-by: Anup Patel --- tools/testing/selftests/kvm/riscv/get-reg-list.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 02ec1a44624a8..4fd0f89515744 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -54,6 +54,7 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKC: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKX: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBS: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFH: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM: @@ -425,6 +426,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZBKC), KVM_ISA_EXT_ARR(ZBKX), KVM_ISA_EXT_ARR(ZBS), + KVM_ISA_EXT_ARR(ZFA), KVM_ISA_EXT_ARR(ZFH), KVM_ISA_EXT_ARR(ZFHMIN), KVM_ISA_EXT_ARR(ZICBOM), @@ -945,6 +947,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB); KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC); KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX); KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS); +KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA); KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH); KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN); KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM); @@ -997,6 +1000,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zbkc, &config_zbkx, &config_zbs, + &config_zfa, &config_zfh, &config_zfhmin, &config_zicbom, -- GitLab From 180a8f12c21f41740fee09ca7f7aa98ff5bb99f8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 23 Dec 2023 15:16:50 +0100 Subject: [PATCH 0063/1405] Input: goodix - accept ACPI resources with gpio_count == 3 && gpio_int_idx == 0 Some devices list 3 Gpio resources in the ACPI resource list for the touchscreen: 1. GpioInt resource pointing to the GPIO used for the interrupt 2. GpioIo resource pointing to the reset GPIO 3. GpioIo resource pointing to the GPIO used for the interrupt Note how the third extra GpioIo resource really is a duplicate of the GpioInt provided info. Ignore this extra GPIO, treating this setup the same as gpio_count == 2 && gpio_int_idx == 0 fixes the touchscreen not working on the Thunderbook Colossus W803 rugged tablet and likely also on the CyberBook_T116K. Reported-by: Maarten van der Schrieck Closes: https://gitlab.com/AdyaAdya/goodix-touchscreen-linux-driver/-/issues/22 Suggested-by: Maarten van der Schrieck Tested-by: Maarten van der Schrieck Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20231223141650.10679-1-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index af32fbe57b630..b068ff8afbc9a 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -884,7 +884,8 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts) } } - if (ts->gpio_count == 2 && ts->gpio_int_idx == 0) { + /* Some devices with gpio_int_idx 0 list a third unused GPIO */ + if ((ts->gpio_count == 2 || ts->gpio_count == 3) && ts->gpio_int_idx == 0) { ts->irq_pin_access_method = IRQ_PIN_ACCESS_ACPI_GPIO; gpio_mapping = acpi_goodix_int_first_gpios; } else if (ts->gpio_count == 2 && ts->gpio_int_idx == 1) { -- GitLab From e4cec073b7755a78030f30cf627141c759035b50 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 20 Jan 2024 23:00:21 -0800 Subject: [PATCH 0064/1405] dmaengine: at_hdmac: fix some kernel-doc warnings Fix some kernel-doc format warnings: at_hdmac.c:243: warning: Excess struct member 'sg_len' description in 'at_desc' at_hdmac.c:252: warning: cannot understand function prototype: 'enum atc_status ' ez at_hdmac.c:351: warning: Excess struct member 'atdma_devtype' description in 'at_dma' at_hdmac.c:351: warning: Excess struct member 'ch_regs' description in 'at_dma' at_hdmac.c:664: warning: contents before sections Signed-off-by: Randy Dunlap Cc: Ludovic Desroches Cc: Tudor Ambarus Cc: linux-arm-kernel@lists.infradead.org Cc: Vinod Koul Cc: dmaengine@vger.kernel.org Link: https://lore.kernel.org/r/20240121070021.25365-1-rdunlap@infradead.org Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index fb89ecbf0cc5b..6bad536e04924 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -222,7 +222,7 @@ struct atdma_sg { * @vd: pointer to the virtual dma descriptor. * @atchan: pointer to the atmel dma channel. * @total_len: total transaction byte count - * @sg_len: number of sg entries. + * @sglen: number of sg entries. * @sg: array of sgs. */ struct at_desc { @@ -245,7 +245,7 @@ struct at_desc { /*-- Channels --------------------------------------------------------*/ /** - * atc_status - information bits stored in channel status flag + * enum atc_status - information bits stored in channel status flag * * Manipulated with atomic operations. */ @@ -328,8 +328,7 @@ static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width) /** * struct at_dma - internal representation of an Atmel HDMA Controller * @dma_device: dmaengine dma_device object members - * @atdma_devtype: identifier of DMA controller compatibility - * @ch_regs: memory mapped register base + * @regs: memory mapped register base * @clk: dma controller clock * @save_imr: interrupt mask register that is saved on suspend/resume cycle * @all_chan_mask: all channels availlable in a mask @@ -626,6 +625,9 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla) /** * atc_get_llis_residue - Get residue for a hardware linked list transfer + * @atchan: pointer to an atmel hdmac channel. + * @desc: pointer to the descriptor for which the residue is calculated. + * @residue: residue to be set to dma_tx_state. * * Calculate the residue by removing the length of the Linked List Item (LLI) * already transferred from the total length. To get the current LLI we can use @@ -661,10 +663,8 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla) * two DSCR values are different, we read again the CTRLA then the DSCR till two * consecutive read values from DSCR are equal or till the maximum trials is * reach. This algorithm is very unlikely not to find a stable value for DSCR. - * @atchan: pointer to an atmel hdmac channel. - * @desc: pointer to the descriptor for which the residue is calculated. - * @residue: residue to be set to dma_tx_state. - * Returns 0 on success, -errno otherwise. + * + * Returns: %0 on success, -errno otherwise. */ static int atc_get_llis_residue(struct at_dma_chan *atchan, struct at_desc *desc, u32 *residue) @@ -731,7 +731,8 @@ static int atc_get_llis_residue(struct at_dma_chan *atchan, * @chan: DMA channel * @cookie: transaction identifier to check status of * @residue: residue to be updated. - * Return 0 on success, -errono otherwise. + * + * Return: %0 on success, -errno otherwise. */ static int atc_get_residue(struct dma_chan *chan, dma_cookie_t cookie, u32 *residue) @@ -1710,7 +1711,7 @@ static void atc_issue_pending(struct dma_chan *chan) * atc_alloc_chan_resources - allocate resources for DMA channel * @chan: allocate descriptor resources for this channel * - * return - the number of allocated descriptors + * Return: the number of allocated descriptors */ static int atc_alloc_chan_resources(struct dma_chan *chan) { -- GitLab From b73e43dcd7a8be26880ef8ff336053b29e79dbc5 Mon Sep 17 00:00:00 2001 From: Guanhua Gao Date: Thu, 18 Jan 2024 11:29:16 -0500 Subject: [PATCH 0065/1405] dmaengine: fsl-dpaa2-qdma: Fix the size of dma pools In case of long format of qDMA command descriptor, there are one frame descriptor, three entries in the frame list and two data entries. So the size of dma_pool_create for these three fields should be the same with the total size of entries respectively, or the contents may be overwritten by the next allocated descriptor. Fixes: 7fdf9b05c73b ("dmaengine: fsl-dpaa2-qdma: Add NXP dpaa2 qDMA controller driver for Layerscape SoCs") Signed-off-by: Guanhua Gao Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240118162917.2951450-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c index 7958ac33e36ce..5a8061a307cda 100644 --- a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c +++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c @@ -38,15 +38,17 @@ static int dpaa2_qdma_alloc_chan_resources(struct dma_chan *chan) if (!dpaa2_chan->fd_pool) goto err; - dpaa2_chan->fl_pool = dma_pool_create("fl_pool", dev, - sizeof(struct dpaa2_fl_entry), - sizeof(struct dpaa2_fl_entry), 0); + dpaa2_chan->fl_pool = + dma_pool_create("fl_pool", dev, + sizeof(struct dpaa2_fl_entry) * 3, + sizeof(struct dpaa2_fl_entry), 0); + if (!dpaa2_chan->fl_pool) goto err_fd; dpaa2_chan->sdd_pool = dma_pool_create("sdd_pool", dev, - sizeof(struct dpaa2_qdma_sd_d), + sizeof(struct dpaa2_qdma_sd_d) * 2, sizeof(struct dpaa2_qdma_sd_d), 0); if (!dpaa2_chan->sdd_pool) goto err_fl; -- GitLab From 1755c4b0372a2cf1e7124956b8cfebcb51083208 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 9 Jan 2024 11:49:06 +0000 Subject: [PATCH 0066/1405] dt-bindings: clock: gs101: rename cmu_misc clock-names 'bus' and 'ip' are sufficient because naming is local to the module. As the bindings have not made a release yet, rename the cmu_misc clock-names. Fixes: 0a910f160638 ("dt-bindings: clock: Add Google gs101 clock management unit bindings") Suggested-by: Rob Herring Signed-off-by: Tudor Ambarus Reviewed-by: Sam Protsenko Acked-by: Rob Herring Link: https://lore.kernel.org/r/20240109114908.3623645-2-tudor.ambarus@linaro.org Signed-off-by: Krzysztof Kozlowski --- .../devicetree/bindings/clock/google,gs101-clock.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/google,gs101-clock.yaml b/Documentation/devicetree/bindings/clock/google,gs101-clock.yaml index 3eebc03a309be..ca7fdada3ff24 100644 --- a/Documentation/devicetree/bindings/clock/google,gs101-clock.yaml +++ b/Documentation/devicetree/bindings/clock/google,gs101-clock.yaml @@ -85,8 +85,8 @@ allOf: clock-names: items: - - const: dout_cmu_misc_bus - - const: dout_cmu_misc_sss + - const: bus + - const: sss additionalProperties: false -- GitLab From d76c762e7ee04af79e1c127422e0bbcb5f123018 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 9 Jan 2024 11:49:08 +0000 Subject: [PATCH 0067/1405] clk: samsung: clk-gs101: comply with the new dt cmu_misc clock names The cmu_misc clock-names were renamed to just "bus" and "sss" because naming is local to the module, so cmu_misc is implied. As the bindings and the device tree have not made a release yet, comply with the renamed clocks. Suggested-by: Rob Herring Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20240109114908.3623645-4-tudor.ambarus@linaro.org Signed-off-by: Krzysztof Kozlowski --- drivers/clk/samsung/clk-gs101.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/samsung/clk-gs101.c b/drivers/clk/samsung/clk-gs101.c index 0964bb11657f1..782993951fff8 100644 --- a/drivers/clk/samsung/clk-gs101.c +++ b/drivers/clk/samsung/clk-gs101.c @@ -2475,7 +2475,7 @@ static const struct samsung_cmu_info misc_cmu_info __initconst = { .nr_clk_ids = CLKS_NR_MISC, .clk_regs = misc_clk_regs, .nr_clk_regs = ARRAY_SIZE(misc_clk_regs), - .clk_name = "dout_cmu_misc_bus", + .clk_name = "bus", }; /* ---- platform_driver ----------------------------------------------------- */ -- GitLab From 6e2276203ac9ff10fc76917ec9813c660f627369 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Thu, 18 Jan 2024 11:19:29 +0800 Subject: [PATCH 0068/1405] dmaengine: ti: edma: Add some null pointer checks to the edma_probe devm_kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. Signed-off-by: Kunwu Chan Link: https://lore.kernel.org/r/20240118031929.192192-1-chentao@kylinos.cn Signed-off-by: Vinod Koul --- drivers/dma/ti/edma.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index f1f920861fa9d..5f8d2e93ff3fb 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -2404,6 +2404,11 @@ static int edma_probe(struct platform_device *pdev) if (irq > 0) { irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccint", dev_name(dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err_disable_pm; + } + ret = devm_request_irq(dev, irq, dma_irq_handler, 0, irq_name, ecc); if (ret) { @@ -2420,6 +2425,11 @@ static int edma_probe(struct platform_device *pdev) if (irq > 0) { irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccerrint", dev_name(dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err_disable_pm; + } + ret = devm_request_irq(dev, irq, dma_ccerr_handler, 0, irq_name, ecc); if (ret) { -- GitLab From bc9847c9ba134cfe3398011e343dcf6588c1c902 Mon Sep 17 00:00:00 2001 From: Jai Luthra Date: Wed, 3 Jan 2024 14:37:55 +0530 Subject: [PATCH 0069/1405] dmaengine: ti: k3-udma: Report short packet errors Propagate the TR response status to the device using BCDMA split-channels. For example CSI-RX driver should be able to check if a frame was not transferred completely (short packet) and needs to be discarded. Fixes: 25dcb5dd7b7c ("dmaengine: ti: New driver for K3 UDMA") Signed-off-by: Jai Luthra Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20240103-tr_resp_err-v1-1-2fdf6d48ab92@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 2841a539c2648..6400d06588a24 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -3968,6 +3968,7 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc, { struct udma_chan *uc = to_udma_chan(&vc->chan); struct udma_desc *d; + u8 status; if (!vd) return; @@ -3977,12 +3978,12 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc, if (d->metadata_size) udma_fetch_epib(uc, d); - /* Provide residue information for the client */ if (result) { void *desc_vaddr = udma_curr_cppi5_desc_vaddr(d, d->desc_idx); if (cppi5_desc_get_type(desc_vaddr) == CPPI5_INFO0_DESC_TYPE_VAL_HOST) { + /* Provide residue information for the client */ result->residue = d->residue - cppi5_hdesc_get_pktlen(desc_vaddr); if (result->residue) @@ -3991,7 +3992,12 @@ static void udma_desc_pre_callback(struct virt_dma_chan *vc, result->result = DMA_TRANS_NOERROR; } else { result->residue = 0; - result->result = DMA_TRANS_NOERROR; + /* Propagate TR Response errors to the client */ + status = d->hwdesc[0].tr_resp_base->status; + if (status) + result->result = DMA_TRANS_ABORTED; + else + result->result = DMA_TRANS_NOERROR; } } } -- GitLab From 968bc1d7203d384e72afe34124a1801b7af76514 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 7 Jan 2024 11:02:03 +0100 Subject: [PATCH 0070/1405] dmaengine: fsl-qdma: Fix a memory leak related to the status queue DMA This dma_alloc_coherent() is undone in the remove function, but not in the error handling path of fsl_qdma_probe(). Switch to the managed version to fix the issue in the probe and simplify the remove function. Fixes: b092529e0aa0 ("dmaengine: fsl-qdma: Add qDMA controller driver for Layerscape SoCs") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/a0ef5d0f5a47381617ef339df776ddc68ce48173.1704621515.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/fsl-qdma.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index a1d0aa63142a9..28c4e86bbcd2e 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -563,11 +563,11 @@ static struct fsl_qdma_queue /* * Buffer for queue command */ - status_head->cq = dma_alloc_coherent(&pdev->dev, - sizeof(struct fsl_qdma_format) * - status_size, - &status_head->bus_addr, - GFP_KERNEL); + status_head->cq = dmam_alloc_coherent(&pdev->dev, + sizeof(struct fsl_qdma_format) * + status_size, + &status_head->bus_addr, + GFP_KERNEL); if (!status_head->cq) { devm_kfree(&pdev->dev, status_head); return NULL; @@ -1268,8 +1268,6 @@ static void fsl_qdma_cleanup_vchan(struct dma_device *dmadev) static void fsl_qdma_remove(struct platform_device *pdev) { - int i; - struct fsl_qdma_queue *status; struct device_node *np = pdev->dev.of_node; struct fsl_qdma_engine *fsl_qdma = platform_get_drvdata(pdev); @@ -1277,12 +1275,6 @@ static void fsl_qdma_remove(struct platform_device *pdev) fsl_qdma_cleanup_vchan(&fsl_qdma->dma_dev); of_dma_controller_free(np); dma_async_device_unregister(&fsl_qdma->dma_dev); - - for (i = 0; i < fsl_qdma->block_number; i++) { - status = fsl_qdma->status[i]; - dma_free_coherent(&pdev->dev, sizeof(struct fsl_qdma_format) * - status->n_cq, status->cq, status->bus_addr); - } } static const struct of_device_id fsl_qdma_dt_ids[] = { -- GitLab From 3aa58cb51318e329d203857f7a191678e60bb714 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 7 Jan 2024 11:02:04 +0100 Subject: [PATCH 0071/1405] dmaengine: fsl-qdma: Fix a memory leak related to the queue command DMA This dma_alloc_coherent() is undone neither in the remove function, nor in the error handling path of fsl_qdma_probe(). Switch to the managed version to fix both issues. Fixes: b092529e0aa0 ("dmaengine: fsl-qdma: Add qDMA controller driver for Layerscape SoCs") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/7f66aa14f59d32b13672dde28602b47deb294e1f.1704621515.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/fsl-qdma.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 28c4e86bbcd2e..9b141369bea5d 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -514,11 +514,11 @@ static struct fsl_qdma_queue queue_temp = queue_head + i + (j * queue_num); queue_temp->cq = - dma_alloc_coherent(&pdev->dev, - sizeof(struct fsl_qdma_format) * - queue_size[i], - &queue_temp->bus_addr, - GFP_KERNEL); + dmam_alloc_coherent(&pdev->dev, + sizeof(struct fsl_qdma_format) * + queue_size[i], + &queue_temp->bus_addr, + GFP_KERNEL); if (!queue_temp->cq) return NULL; queue_temp->block_base = fsl_qdma->block_base + -- GitLab From 0650006a93a2ce3b57f86e7f000347d9ae7737ef Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 7 Jan 2024 11:02:05 +0100 Subject: [PATCH 0072/1405] dmaengine: fsl-qdma: Remove a useless devm_kfree() 'status_head' is a managed resource. It will be freed automatically if fsl_qdma_prep_status_queue(), and so fsl_qdma_probe(), fails. Remove the redundant (and harmless) devm_kfree() call. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/6b7f60aa2b92f73b35c586886daffc1a5ac58697.1704621515.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/fsl-qdma.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 9b141369bea5d..f405c77060ad8 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -568,10 +568,9 @@ static struct fsl_qdma_queue status_size, &status_head->bus_addr, GFP_KERNEL); - if (!status_head->cq) { - devm_kfree(&pdev->dev, status_head); + if (!status_head->cq) return NULL; - } + status_head->n_cq = status_size; status_head->virt_head = status_head->cq; status_head->virt_tail = status_head->cq; -- GitLab From 1513664f340289cf10402753110f3cff12a738aa Mon Sep 17 00:00:00 2001 From: Andy Chi Date: Mon, 22 Jan 2024 15:48:24 +0800 Subject: [PATCH 0073/1405] ALSA: hda/realtek: fix mute/micmute LEDs for HP ZBook Power The HP ZBook Power using ALC236 codec which using 0x02 to control mute LED and 0x01 to control micmute LED. Therefore, add a quirk to make it works. Signed-off-by: Andy Chi Cc: Link: https://lore.kernel.org/r/20240122074826.1020964-1-andy.chi@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f6f16622f9cc7..19f2eb26659d8 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9957,6 +9957,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ca2, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), -- GitLab From 741ba0134fa7822fcf4e4a0a537a5c4cfd706b20 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 27 Dec 2023 16:21:24 +0100 Subject: [PATCH 0074/1405] pmdomain: core: Move the unused cleanup to a _sync initcall The unused clock cleanup uses the _sync initcall to give all users at earlier initcalls time to probe. Do the same to avoid leaving some PDs dangling at "on" (which actually happened on qcom!). Fixes: 2fe71dcdfd10 ("PM / domains: Add late_initcall to disable unused PM domains") Signed-off-by: Konrad Dybcio Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231227-topic-pmdomain_sync_cleanup-v1-1-5f36769d538b@linaro.org Signed-off-by: Ulf Hansson --- drivers/pmdomain/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c index a1f6cba3ae6c8..18e232b5ed53d 100644 --- a/drivers/pmdomain/core.c +++ b/drivers/pmdomain/core.c @@ -1109,7 +1109,7 @@ static int __init genpd_power_off_unused(void) return 0; } -late_initcall(genpd_power_off_unused); +late_initcall_sync(genpd_power_off_unused); #ifdef CONFIG_PM_SLEEP -- GitLab From f0e4a1356466ec1858ae8e5c70bea2ce5e55008b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 12 Jan 2024 17:33:55 +0100 Subject: [PATCH 0075/1405] pmdomain: renesas: r8a77980-sysc: CR7 must be always on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The power domain containing the Cortex-R7 CPU core on the R-Car V3H SoC must always be in power-on state, unlike on other SoCs in the R-Car Gen3 family. See Table 9.4 "Power domains" in the R-Car Series, 3rd Generation Hardware User’s Manual Rev.1.00 and later. Fix this by marking the domain as a CPU domain without control registers, so the driver will not touch it. Fixes: 41d6d8bd8ae9 ("soc: renesas: rcar-sysc: add R8A77980 support") Signed-off-by: Geert Uytterhoeven Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/fdad9a86132d53ecddf72b734dac406915c4edc0.1705076735.git.geert+renesas@glider.be Signed-off-by: Ulf Hansson --- drivers/pmdomain/renesas/r8a77980-sysc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pmdomain/renesas/r8a77980-sysc.c b/drivers/pmdomain/renesas/r8a77980-sysc.c index 39ca84a67daad..621e411fc9991 100644 --- a/drivers/pmdomain/renesas/r8a77980-sysc.c +++ b/drivers/pmdomain/renesas/r8a77980-sysc.c @@ -25,7 +25,8 @@ static const struct rcar_sysc_area r8a77980_areas[] __initconst = { PD_CPU_NOCR }, { "ca53-cpu3", 0x200, 3, R8A77980_PD_CA53_CPU3, R8A77980_PD_CA53_SCU, PD_CPU_NOCR }, - { "cr7", 0x240, 0, R8A77980_PD_CR7, R8A77980_PD_ALWAYS_ON }, + { "cr7", 0x240, 0, R8A77980_PD_CR7, R8A77980_PD_ALWAYS_ON, + PD_CPU_NOCR }, { "a3ir", 0x180, 0, R8A77980_PD_A3IR, R8A77980_PD_ALWAYS_ON }, { "a2ir0", 0x400, 0, R8A77980_PD_A2IR0, R8A77980_PD_A3IR }, { "a2ir1", 0x400, 1, R8A77980_PD_A2IR1, R8A77980_PD_A3IR }, -- GitLab From ed1a72fb0d646c983c85b62144fb1d134a8edb71 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Jan 2024 21:55:22 +0300 Subject: [PATCH 0076/1405] kunit: Fix a NULL vs IS_ERR() bug The kunit_device_register() function doesn't return NULL, it returns error pointers. Change the KUNIT_ASSERT_NOT_NULL() to check for ERR_OR_NULL(). Fixes: d03c720e03bd ("kunit: Add APIs for managing devices") Signed-off-by: Dan Carpenter Reviewed-by: Rae Moar Reviewed-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/kunit-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c index c4259d910356b..f7980ef236a38 100644 --- a/lib/kunit/kunit-test.c +++ b/lib/kunit/kunit-test.c @@ -720,7 +720,7 @@ static void kunit_device_cleanup_test(struct kunit *test) long action_was_run = 0; test_device = kunit_device_register(test, "my_device"); - KUNIT_ASSERT_NOT_NULL(test, test_device); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, test_device); /* Add an action to verify cleanup. */ devm_add_action(test_device, test_dev_action, &action_was_run); -- GitLab From 083974ebb8fc65978d6cacd1bcfe9158d6234b98 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Jan 2024 21:55:14 +0300 Subject: [PATCH 0077/1405] kunit: device: Fix a NULL vs IS_ERR() check in init() The root_device_register() function does not return NULL, it returns error pointers. Fix the check to match. Fixes: d03c720e03bd ("kunit: Add APIs for managing devices") Signed-off-by: Dan Carpenter Reviewed-by: Rae Moar Reviewed-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/kunit/device.c b/lib/kunit/device.c index f5371287b3750..074c6dd2e36a7 100644 --- a/lib/kunit/device.c +++ b/lib/kunit/device.c @@ -45,8 +45,8 @@ int kunit_bus_init(void) int error; kunit_bus_device = root_device_register("kunit"); - if (!kunit_bus_device) - return -ENOMEM; + if (IS_ERR(kunit_bus_device)) + return PTR_ERR(kunit_bus_device); error = bus_register(&kunit_bus_type); if (error) -- GitLab From 57e39086fb868a84f772cf097004f4715d8aaccb Mon Sep 17 00:00:00 2001 From: David Gow Date: Fri, 12 Jan 2024 07:49:47 +0800 Subject: [PATCH 0078/1405] MAINTAINERS: kunit: Add Rae Moar as a reviewer Rae has been shouldering a lot of the KUnit review burden for the last year, and will continue to do so in the future. Thanks! Signed-off-by: David Gow Reviewed-by: Rae Moar Signed-off-by: Shuah Khan --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a692..354d993bc2cf8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11724,6 +11724,7 @@ F: fs/smb/server/ KERNEL UNIT TESTING FRAMEWORK (KUnit) M: Brendan Higgins M: David Gow +R: Rae Moar L: linux-kselftest@vger.kernel.org L: kunit-dev@googlegroups.com S: Maintained -- GitLab From a1af6a2bfa0cb46d70b7df5352993e750da6c79b Mon Sep 17 00:00:00 2001 From: Marco Pagani Date: Wed, 10 Jan 2024 16:59:47 +0100 Subject: [PATCH 0079/1405] kunit: run test suites only after module initialization completes Commit 2810c1e99867 ("kunit: Fix wild-memory-access bug in kunit_free_suite_set()") fixed a wild-memory-access bug that could have happened during the loading phase of test suites built and executed as loadable modules. However, it also introduced a problematic side effect that causes test suites modules to crash when they attempt to register fake devices. When a module is loaded, it traverses the MODULE_STATE_UNFORMED and MODULE_STATE_COMING states before reaching the normal operating state MODULE_STATE_LIVE. Finally, when the module is removed, it moves to MODULE_STATE_GOING before being released. However, if the loading function load_module() fails between complete_formation() and do_init_module(), the module goes directly from MODULE_STATE_COMING to MODULE_STATE_GOING without passing through MODULE_STATE_LIVE. This behavior was causing kunit_module_exit() to be called without having first executed kunit_module_init(). Since kunit_module_exit() is responsible for freeing the memory allocated by kunit_module_init() through kunit_filter_suites(), this behavior was resulting in a wild-memory-access bug. Commit 2810c1e99867 ("kunit: Fix wild-memory-access bug in kunit_free_suite_set()") fixed this issue by running the tests when the module is still in MODULE_STATE_COMING. However, modules in that state are not fully initialized, lacking sysfs kobjects. Therefore, if a test module attempts to register a fake device, it will inevitably crash. This patch proposes a different approach to fix the original wild-memory-access bug while restoring the normal module execution flow by making kunit_module_exit() able to detect if kunit_module_init() has previously initialized the tests suite set. In this way, test modules can once again register fake devices without crashing. This behavior is achieved by checking whether mod->kunit_suites is a virtual or direct mapping address. If it is a virtual address, then kunit_module_init() has allocated the suite_set in kunit_filter_suites() using kmalloc_array(). On the contrary, if mod->kunit_suites is still pointing to the original address that was set when looking up the .kunit_test_suites section of the module, then the loading phase has failed and there's no memory to be freed. v4: - rebased on 6.8 - noted that kunit_filter_suites() must return a virtual address v3: - add a comment to clarify why the start address is checked v2: - add include Fixes: 2810c1e99867 ("kunit: Fix wild-memory-access bug in kunit_free_suite_set()") Reviewed-by: David Gow Tested-by: Rae Moar Tested-by: Richard Fitzgerald Reviewed-by: Javier Martinez Canillas Signed-off-by: Marco Pagani Signed-off-by: Shuah Khan --- lib/kunit/executor.c | 4 ++++ lib/kunit/test.c | 14 +++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c index 717b9599036ba..689fff2b2b106 100644 --- a/lib/kunit/executor.c +++ b/lib/kunit/executor.c @@ -146,6 +146,10 @@ void kunit_free_suite_set(struct kunit_suite_set suite_set) kfree(suite_set.start); } +/* + * Filter and reallocate test suites. Must return the filtered test suites set + * allocated at a valid virtual address or NULL in case of error. + */ struct kunit_suite_set kunit_filter_suites(const struct kunit_suite_set *suite_set, const char *filter_glob, diff --git a/lib/kunit/test.c b/lib/kunit/test.c index f95d2093a0aa3..31a5a992e6467 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "debugfs.h" #include "device-impl.h" @@ -801,12 +802,19 @@ static void kunit_module_exit(struct module *mod) }; const char *action = kunit_action(); + /* + * Check if the start address is a valid virtual address to detect + * if the module load sequence has failed and the suite set has not + * been initialized and filtered. + */ + if (!suite_set.start || !virt_addr_valid(suite_set.start)) + return; + if (!action) __kunit_test_suites_exit(mod->kunit_suites, mod->num_kunit_suites); - if (suite_set.start) - kunit_free_suite_set(suite_set); + kunit_free_suite_set(suite_set); } static int kunit_module_notify(struct notifier_block *nb, unsigned long val, @@ -816,12 +824,12 @@ static int kunit_module_notify(struct notifier_block *nb, unsigned long val, switch (val) { case MODULE_STATE_LIVE: + kunit_module_init(mod); break; case MODULE_STATE_GOING: kunit_module_exit(mod); break; case MODULE_STATE_COMING: - kunit_module_init(mod); break; case MODULE_STATE_UNFORMED: break; -- GitLab From 1a9f2c776d1416c4ea6cb0d0b9917778c41a1a7d Mon Sep 17 00:00:00 2001 From: Arthur Grillo Date: Wed, 10 Jan 2024 14:39:28 -0300 Subject: [PATCH 0080/1405] Documentation: KUnit: Update the instructions on how to test static functions Now that we have the VISIBLE_IF_KUNIT and EXPORT_SYMBOL_IF_KUNIT macros, update the instructions to recommend this way of testing static functions. Signed-off-by: Arthur Grillo Reviewed-by: David Gow Signed-off-by: Shuah Khan --- Documentation/dev-tools/kunit/usage.rst | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst index a9efab50eed83..22955d56b3799 100644 --- a/Documentation/dev-tools/kunit/usage.rst +++ b/Documentation/dev-tools/kunit/usage.rst @@ -671,8 +671,23 @@ Testing Static Functions ------------------------ If we do not want to expose functions or variables for testing, one option is to -conditionally ``#include`` the test file at the end of your .c file. For -example: +conditionally export the used symbol. For example: + +.. code-block:: c + + /* In my_file.c */ + + VISIBLE_IF_KUNIT int do_interesting_thing(); + EXPORT_SYMBOL_IF_KUNIT(do_interesting_thing); + + /* In my_file.h */ + + #if IS_ENABLED(CONFIG_KUNIT) + int do_interesting_thing(void); + #endif + +Alternatively, you could conditionally ``#include`` the test file at the end of +your .c file. For example: .. code-block:: c -- GitLab From 523d242d4309797e6b27c708fbd1463f301c199a Mon Sep 17 00:00:00 2001 From: Zhu Ning Date: Sat, 20 Jan 2024 18:12:36 +0800 Subject: [PATCH 0081/1405] ASoC: codecs: ES8326: improving crosstalk performance We change the crosstalk parameter in es8326_resume function to improve crosstalk performance. Adding crosstalk kcontrol to enhance the flexibility of crosstalk debugging in machine. Adding ES8326_DAC_CROSSTALK macro to declare the crosstalk register. Signed-off-by: Zhu Ning Link: https://msgid.link/r/20240120101240.12496-2-zhuning0077@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/es8326.c | 82 +++++++++++++++++++++++++++++++++++++++ sound/soc/codecs/es8326.h | 1 + 2 files changed, 83 insertions(+) diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index fa890f6205e2a..82d1c4f8324c8 100755 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -45,6 +45,82 @@ struct es8326_priv { int jack_remove_retry; }; +static int es8326_crosstalk1_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); + struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component); + unsigned int crosstalk_h, crosstalk_l; + unsigned int crosstalk; + + regmap_read(es8326->regmap, ES8326_DAC_RAMPRATE, &crosstalk_h); + regmap_read(es8326->regmap, ES8326_DAC_CROSSTALK, &crosstalk_l); + crosstalk_h &= 0x20; + crosstalk_l &= 0xf0; + crosstalk = crosstalk_h >> 1 | crosstalk_l >> 4; + ucontrol->value.integer.value[0] = crosstalk; + + return 0; +} + +static int es8326_crosstalk1_set(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); + struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component); + unsigned int crosstalk_h, crosstalk_l; + unsigned int crosstalk; + + crosstalk = ucontrol->value.integer.value[0]; + regmap_read(es8326->regmap, ES8326_DAC_CROSSTALK, &crosstalk_l); + crosstalk_h = (crosstalk & 0x10) << 1; + crosstalk_l &= 0x0f; + crosstalk_l |= (crosstalk & 0x0f) << 4; + regmap_update_bits(es8326->regmap, ES8326_DAC_RAMPRATE, + 0x20, crosstalk_h); + regmap_write(es8326->regmap, ES8326_DAC_CROSSTALK, crosstalk_l); + + return 0; +} + +static int es8326_crosstalk2_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); + struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component); + unsigned int crosstalk_h, crosstalk_l; + unsigned int crosstalk; + + regmap_read(es8326->regmap, ES8326_DAC_RAMPRATE, &crosstalk_h); + regmap_read(es8326->regmap, ES8326_DAC_CROSSTALK, &crosstalk_l); + crosstalk_h &= 0x10; + crosstalk_l &= 0x0f; + crosstalk = crosstalk_h | crosstalk_l; + ucontrol->value.integer.value[0] = crosstalk; + + return 0; +} + +static int es8326_crosstalk2_set(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); + struct es8326_priv *es8326 = snd_soc_component_get_drvdata(component); + unsigned int crosstalk_h, crosstalk_l; + unsigned int crosstalk; + + crosstalk = ucontrol->value.integer.value[0]; + regmap_read(es8326->regmap, ES8326_DAC_CROSSTALK, &crosstalk_l); + crosstalk_h = crosstalk & 0x10; + crosstalk_l &= 0xf0; + crosstalk_l |= crosstalk & 0x0f; + regmap_update_bits(es8326->regmap, ES8326_DAC_RAMPRATE, + 0x10, crosstalk_h); + regmap_write(es8326->regmap, ES8326_DAC_CROSSTALK, crosstalk_l); + + return 0; +} + static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(dac_vol_tlv, -9550, 50, 0); static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(adc_vol_tlv, -9550, 50, 0); static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(adc_analog_pga_tlv, 0, 300, 0); @@ -102,6 +178,10 @@ static const struct snd_kcontrol_new es8326_snd_controls[] = { SOC_SINGLE_TLV("ALC Capture Target Level", ES8326_ALC_LEVEL, 0, 0x0f, 0, drc_target_tlv), + SOC_SINGLE_EXT("CROSSTALK1", SND_SOC_NOPM, 0, 31, 0, + es8326_crosstalk1_get, es8326_crosstalk1_set), + SOC_SINGLE_EXT("CROSSTALK2", SND_SOC_NOPM, 0, 31, 0, + es8326_crosstalk2_get, es8326_crosstalk2_set), }; static const struct snd_soc_dapm_widget es8326_dapm_widgets[] = { @@ -844,6 +924,8 @@ static int es8326_resume(struct snd_soc_component *component) regmap_write(es8326->regmap, ES8326_CLK_CAL_TIME, 0x00); /* calibrate for B version */ es8326_calibrate(component); + regmap_write(es8326->regmap, ES8326_DAC_CROSSTALK, 0xaa); + regmap_write(es8326->regmap, ES8326_DAC_RAMPRATE, 0x00); /* turn off headphone out */ regmap_write(es8326->regmap, ES8326_HP_CAL, 0x00); /* set ADC and DAC in low power mode */ diff --git a/sound/soc/codecs/es8326.h b/sound/soc/codecs/es8326.h index 90a08351d6acd..dfef808673f4a 100644 --- a/sound/soc/codecs/es8326.h +++ b/sound/soc/codecs/es8326.h @@ -72,6 +72,7 @@ #define ES8326_DAC_VOL 0x50 #define ES8326_DRC_RECOVERY 0x53 #define ES8326_DRC_WINSIZE 0x54 +#define ES8326_DAC_CROSSTALK 0x55 #define ES8326_HPJACK_TIMER 0x56 #define ES8326_HPDET_TYPE 0x57 #define ES8326_INT_SOURCE 0x58 -- GitLab From 14a0a1ec3335ac3945a96437c35465e4a9616b88 Mon Sep 17 00:00:00 2001 From: Zhu Ning Date: Sat, 20 Jan 2024 18:12:37 +0800 Subject: [PATCH 0082/1405] ASoC: codecs: ES8326: Improving the THD+N performance We update the values of some registers in the initialization sequence in es8326_resume function to improve THD+N performance. THD+N performance decreases if the output level on headphone is close to full scale. So we change the register setting in es8326_jack_detect_handler function to improve THD+N performance if headphone pulgged. Also, the register setting should be restored when the headset is unplugged Signed-off-by: Zhu Ning Link: https://msgid.link/r/20240120101240.12496-3-zhuning0077@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/es8326.c | 21 +++++++++++++-------- sound/soc/codecs/es8326.h | 2 +- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index 82d1c4f8324c8..10157a4bd500a 100755 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -752,6 +752,8 @@ static void es8326_jack_detect_handler(struct work_struct *work) es8326->hp = 0; } regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01); + regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x0a); + regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x03); /* * Inverted HPJACK_POL bit to trigger one IRQ to double check HP Removal event */ @@ -777,6 +779,8 @@ static void es8326_jack_detect_handler(struct work_struct *work) regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01); usleep_range(50000, 70000); regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00); + regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x1f); + regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x08); queue_delayed_work(system_wq, &es8326->jack_detect_work, msecs_to_jiffies(400)); es8326->hp = 1; @@ -846,14 +850,14 @@ static int es8326_calibrate(struct snd_soc_component *component) if ((es8326->version == ES8326_VERSION_B) && (es8326->calibrated == false)) { dev_dbg(component->dev, "ES8326_VERSION_B, calibrating\n"); regmap_write(es8326->regmap, ES8326_CLK_INV, 0xc0); - regmap_write(es8326->regmap, ES8326_CLK_DIV1, 0x01); + regmap_write(es8326->regmap, ES8326_CLK_DIV1, 0x03); regmap_write(es8326->regmap, ES8326_CLK_DLL, 0x30); regmap_write(es8326->regmap, ES8326_CLK_MUX, 0xed); regmap_write(es8326->regmap, ES8326_CLK_DAC_SEL, 0x08); regmap_write(es8326->regmap, ES8326_CLK_TRI, 0xc1); regmap_write(es8326->regmap, ES8326_DAC_MUTE, 0x03); regmap_write(es8326->regmap, ES8326_ANA_VSEL, 0x7f); - regmap_write(es8326->regmap, ES8326_VMIDLOW, 0x03); + regmap_write(es8326->regmap, ES8326_VMIDLOW, 0x23); regmap_write(es8326->regmap, ES8326_DAC2HPMIX, 0x88); usleep_range(15000, 20000); regmap_write(es8326->regmap, ES8326_HP_OFFSET_CAL, 0x8c); @@ -894,13 +898,13 @@ static int es8326_resume(struct snd_soc_component *component) /* reset internal clock state */ regmap_write(es8326->regmap, ES8326_RESET, 0x1f); regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x0E); + regmap_write(es8326->regmap, ES8326_ANA_LP, 0xf0); usleep_range(10000, 15000); regmap_write(es8326->regmap, ES8326_HPJACK_TIMER, 0xe9); - regmap_write(es8326->regmap, ES8326_ANA_MICBIAS, 0x4b); + regmap_write(es8326->regmap, ES8326_ANA_MICBIAS, 0xcb); /* set headphone default type and detect pin */ regmap_write(es8326->regmap, ES8326_HPDET_TYPE, 0x83); regmap_write(es8326->regmap, ES8326_CLK_RESAMPLE, 0x05); - regmap_write(es8326->regmap, ES8326_HP_MISC, 0x30); /* set internal oscillator as clock source of headpone cp */ regmap_write(es8326->regmap, ES8326_CLK_DIV_CPC, 0x89); @@ -908,14 +912,15 @@ static int es8326_resume(struct snd_soc_component *component) /* clock manager reset release */ regmap_write(es8326->regmap, ES8326_RESET, 0x17); /* set headphone detection as half scan mode */ - regmap_write(es8326->regmap, ES8326_HP_MISC, 0x30); + regmap_write(es8326->regmap, ES8326_HP_MISC, 0x3d); regmap_write(es8326->regmap, ES8326_PULLUP_CTL, 0x00); /* enable headphone driver */ + regmap_write(es8326->regmap, ES8326_HP_VOL, 0xc4); regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xa7); usleep_range(2000, 5000); - regmap_write(es8326->regmap, ES8326_HP_DRIVER_REF, 0xa3); - regmap_write(es8326->regmap, ES8326_HP_DRIVER_REF, 0xb3); + regmap_write(es8326->regmap, ES8326_HP_DRIVER_REF, 0x23); + regmap_write(es8326->regmap, ES8326_HP_DRIVER_REF, 0x33); regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xa1); regmap_write(es8326->regmap, ES8326_CLK_INV, 0x00); @@ -946,7 +951,7 @@ static int es8326_resume(struct snd_soc_component *component) (ES8326_IO_DMIC_CLK << ES8326_SDINOUT1_SHIFT)); regmap_write(es8326->regmap, ES8326_SDINOUT23_IO, ES8326_IO_INPUT); - regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x3b); + regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x00); regmap_write(es8326->regmap, ES8326_RESET, ES8326_CSM_ON); regmap_update_bits(es8326->regmap, ES8326_PGAGAIN, ES8326_MIC_SEL_MASK, ES8326_MIC1_SEL); diff --git a/sound/soc/codecs/es8326.h b/sound/soc/codecs/es8326.h index dfef808673f4a..4234bbb900c45 100644 --- a/sound/soc/codecs/es8326.h +++ b/sound/soc/codecs/es8326.h @@ -101,7 +101,7 @@ #define ES8326_MUTE (3 << 0) /* ES8326_CLK_CTL */ -#define ES8326_CLK_ON (0x7f << 0) +#define ES8326_CLK_ON (0x7e << 0) #define ES8326_CLK_OFF (0 << 0) /* ES8326_CLK_INV */ -- GitLab From a3aa9255d6ccb1bff13c7c98e5d3bf10ba67f92e Mon Sep 17 00:00:00 2001 From: Zhu Ning Date: Sat, 20 Jan 2024 18:12:39 +0800 Subject: [PATCH 0083/1405] ASoC: codecs: ES8326: Minimize the pop noise on headphone We modify the register settings to minimize headphone pop noise during ES8326 power-up and music start/stop. Signed-off-by: Zhu Ning Link: https://msgid.link/r/20240120101240.12496-5-zhuning0077@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/es8326.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index 10157a4bd500a..1ed068c417d60 100755 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -523,7 +523,8 @@ static int es8326_mute(struct snd_soc_dai *dai, int mute, int direction) regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_OFF); regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE, ES8326_MUTE_MASK, ES8326_MUTE); - regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xf0); + regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, + 0x30, 0x00); } else { if (!es8326->calibrated) { regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_FORCE_CAL); @@ -536,8 +537,13 @@ static int es8326_mute(struct snd_soc_dai *dai, int mute, int direction) regmap_write(es8326->regmap, ES8326_HPR_OFFSET_INI, offset_r); es8326->calibrated = true; } + regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x01); + usleep_range(1000, 5000); + regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x00); + usleep_range(1000, 5000); + regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x30, 0x20); + regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x30, 0x30); regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xa1); - regmap_write(es8326->regmap, ES8326_HP_VOL, 0x91); regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_ON); regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE, ES8326_MUTE_MASK, ~(ES8326_MUTE)); @@ -557,23 +563,20 @@ static int es8326_set_bias_level(struct snd_soc_component *codec, if (ret) return ret; - regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x00); + regmap_update_bits(es8326->regmap, ES8326_RESET, 0x02, 0x02); + usleep_range(5000, 10000); regmap_write(es8326->regmap, ES8326_INTOUT_IO, es8326->interrupt_clk); regmap_write(es8326->regmap, ES8326_SDINOUT1_IO, (ES8326_IO_DMIC_CLK << ES8326_SDINOUT1_SHIFT)); - regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x0E); regmap_write(es8326->regmap, ES8326_PGA_PDN, 0x40); regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x00); regmap_update_bits(es8326->regmap, ES8326_CLK_CTL, 0x20, 0x20); - - regmap_update_bits(es8326->regmap, ES8326_RESET, - ES8326_CSM_ON, ES8326_CSM_ON); + regmap_update_bits(es8326->regmap, ES8326_RESET, 0x02, 0x00); break; case SND_SOC_BIAS_PREPARE: break; case SND_SOC_BIAS_STANDBY: regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x3b); - regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x00); regmap_update_bits(es8326->regmap, ES8326_CLK_CTL, 0x20, 0x00); regmap_write(es8326->regmap, ES8326_SDINOUT1_IO, ES8326_IO_INPUT); break; @@ -777,6 +780,7 @@ static void es8326_jack_detect_handler(struct work_struct *work) * Don't report jack status. */ regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01); + es8326_enable_micbias(es8326->component); usleep_range(50000, 70000); regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00); regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x1f); @@ -820,13 +824,10 @@ static void es8326_jack_detect_handler(struct work_struct *work) static irqreturn_t es8326_irq(int irq, void *dev_id) { struct es8326_priv *es8326 = dev_id; - struct snd_soc_component *comp = es8326->component; if (!es8326->jack) goto out; - es8326_enable_micbias(comp); - if (es8326->jack->status & SND_JACK_HEADSET) queue_delayed_work(system_wq, &es8326->jack_detect_work, msecs_to_jiffies(10)); @@ -943,6 +944,14 @@ static int es8326_resume(struct snd_soc_component *component) regmap_write(es8326->regmap, ES8326_DAC_DSM, 0x08); regmap_write(es8326->regmap, ES8326_DAC_VPPSCALE, 0x15); + regmap_write(es8326->regmap, ES8326_HPDET_TYPE, 0x80 | + ((es8326->version == ES8326_VERSION_B) ? + (ES8326_HP_DET_SRC_PIN9 | es8326->jack_pol) : + (ES8326_HP_DET_SRC_PIN9 | es8326->jack_pol | 0x04))); + usleep_range(5000, 10000); + es8326_enable_micbias(es8326->component); + usleep_range(50000, 70000); + regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00); regmap_write(es8326->regmap, ES8326_INT_SOURCE, (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON)); regmap_write(es8326->regmap, ES8326_INTOUT_IO, @@ -959,11 +968,6 @@ static int es8326_resume(struct snd_soc_component *component) regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE, ES8326_MUTE_MASK, ES8326_MUTE); - regmap_write(es8326->regmap, ES8326_HPDET_TYPE, 0x80 | - ((es8326->version == ES8326_VERSION_B) ? - (ES8326_HP_DET_SRC_PIN9 | es8326->jack_pol) : - (ES8326_HP_DET_SRC_PIN9 | es8326->jack_pol | 0x04))); - regmap_write(es8326->regmap, ES8326_HP_VOL, 0x11); es8326->jack_remove_retry = 0; es8326->hp = 0; -- GitLab From 8c99a0a607b5e0cf6b79b283d7bb2c2b84e01da5 Mon Sep 17 00:00:00 2001 From: Zhu Ning Date: Sat, 20 Jan 2024 18:12:40 +0800 Subject: [PATCH 0084/1405] ASoC: codecs: ES8326: fix the capture noise issue We get a noise issue during the startup of recording. We update the register setting and dapm widgets to fix this issue. we change callback type of es8326_mute function to mute_stream. ES8326_ADC_MUTE is moved to es8326_mute function so it can be turned on at last and turned off at first. Signed-off-by: Zhu Ning Link: https://msgid.link/r/20240120101240.12496-6-zhuning0077@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/es8326.c | 63 ++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index 1ed068c417d60..cbcd02ec6ba42 100755 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -197,12 +197,6 @@ static const struct snd_soc_dapm_widget es8326_dapm_widgets[] = { SND_SOC_DAPM_AIF_OUT("I2S OUT", "I2S1 Capture", 0, SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_AIF_IN("I2S IN", "I2S1 Playback", 0, SND_SOC_NOPM, 0, 0), - /* ADC Digital Mute */ - SND_SOC_DAPM_PGA("ADC L1", ES8326_ADC_MUTE, 0, 1, NULL, 0), - SND_SOC_DAPM_PGA("ADC R1", ES8326_ADC_MUTE, 1, 1, NULL, 0), - SND_SOC_DAPM_PGA("ADC L2", ES8326_ADC_MUTE, 2, 1, NULL, 0), - SND_SOC_DAPM_PGA("ADC R2", ES8326_ADC_MUTE, 3, 1, NULL, 0), - /* Analog Power Supply*/ SND_SOC_DAPM_DAC("Right DAC", NULL, ES8326_ANA_PDN, 0, 1), SND_SOC_DAPM_DAC("Left DAC", NULL, ES8326_ANA_PDN, 1, 1), @@ -222,15 +216,10 @@ static const struct snd_soc_dapm_widget es8326_dapm_widgets[] = { }; static const struct snd_soc_dapm_route es8326_dapm_routes[] = { - {"ADC L1", NULL, "MIC1"}, - {"ADC R1", NULL, "MIC2"}, - {"ADC L2", NULL, "MIC3"}, - {"ADC R2", NULL, "MIC4"}, - - {"ADC L", NULL, "ADC L1"}, - {"ADC R", NULL, "ADC R1"}, - {"ADC L", NULL, "ADC L2"}, - {"ADC R", NULL, "ADC R2"}, + {"ADC L", NULL, "MIC1"}, + {"ADC R", NULL, "MIC2"}, + {"ADC L", NULL, "MIC3"}, + {"ADC R", NULL, "MIC4"}, {"I2S OUT", NULL, "ADC L"}, {"I2S OUT", NULL, "ADC R"}, @@ -520,11 +509,16 @@ static int es8326_mute(struct snd_soc_dai *dai, int mute, int direction) unsigned int offset_l, offset_r; if (mute) { - regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_OFF); - regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE, - ES8326_MUTE_MASK, ES8326_MUTE); - regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, - 0x30, 0x00); + if (direction == SNDRV_PCM_STREAM_PLAYBACK) { + regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_OFF); + regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE, + ES8326_MUTE_MASK, ES8326_MUTE); + regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, + 0x30, 0x00); + } else { + regmap_update_bits(es8326->regmap, ES8326_ADC_MUTE, + 0x0F, 0x0F); + } } else { if (!es8326->calibrated) { regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_FORCE_CAL); @@ -537,16 +531,22 @@ static int es8326_mute(struct snd_soc_dai *dai, int mute, int direction) regmap_write(es8326->regmap, ES8326_HPR_OFFSET_INI, offset_r); es8326->calibrated = true; } - regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x01); - usleep_range(1000, 5000); - regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x00); - usleep_range(1000, 5000); - regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x30, 0x20); - regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x30, 0x30); - regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xa1); - regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_ON); - regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE, - ES8326_MUTE_MASK, ~(ES8326_MUTE)); + if (direction == SNDRV_PCM_STREAM_PLAYBACK) { + regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x01); + usleep_range(1000, 5000); + regmap_update_bits(es8326->regmap, ES8326_DAC_DSM, 0x01, 0x00); + usleep_range(1000, 5000); + regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x30, 0x20); + regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x30, 0x30); + regmap_write(es8326->regmap, ES8326_HP_DRIVER, 0xa1); + regmap_write(es8326->regmap, ES8326_HP_CAL, ES8326_HP_ON); + regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE, + ES8326_MUTE_MASK, ~(ES8326_MUTE)); + } else { + msleep(300); + regmap_update_bits(es8326->regmap, ES8326_ADC_MUTE, + 0x0F, 0x00); + } } return 0; } @@ -596,7 +596,7 @@ static const struct snd_soc_dai_ops es8326_ops = { .set_fmt = es8326_set_dai_fmt, .set_sysclk = es8326_set_dai_sysclk, .mute_stream = es8326_mute, - .no_capture_mute = 1, + .no_capture_mute = 0, }; static struct snd_soc_dai_driver es8326_dai = { @@ -968,6 +968,7 @@ static int es8326_resume(struct snd_soc_component *component) regmap_update_bits(es8326->regmap, ES8326_DAC_MUTE, ES8326_MUTE_MASK, ES8326_MUTE); + regmap_write(es8326->regmap, ES8326_ADC_MUTE, 0x0f); es8326->jack_remove_retry = 0; es8326->hp = 0; -- GitLab From b53cc6144a3f6c8b56afcdec89d81195c9b0dc69 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 19 Jan 2024 12:24:17 +0100 Subject: [PATCH 0085/1405] ASoC: codecs: wsa883x: fix PA volume control The PA gain can be set in steps of 1.5 dB from -3 dB to 18 dB, that is, in 15 levels. Fix the dB values for the PA volume control as experiments using wsa8835 show that the first 16 levels all map to the same lowest gain while the last three map to the highest gain. These values specifically need to be correct for the sound server to provide proper volume control. Note that level 0 (-3 dB) does not mute the PA so the mute flag should also not be set. Fixes: cdb09e623143 ("ASoC: codecs: wsa883x: add control, dapm widgets and map") Cc: stable@vger.kernel.org # 6.0 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://msgid.link/r/20240119112420.7446-2-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wsa883x.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/wsa883x.c b/sound/soc/codecs/wsa883x.c index cb83c569e18d6..a2e86ef7d18f5 100644 --- a/sound/soc/codecs/wsa883x.c +++ b/sound/soc/codecs/wsa883x.c @@ -1098,7 +1098,11 @@ static int wsa_dev_mode_put(struct snd_kcontrol *kcontrol, return 1; } -static const DECLARE_TLV_DB_SCALE(pa_gain, -300, 150, -300); +static const SNDRV_CTL_TLVD_DECLARE_DB_RANGE(pa_gain, + 0, 14, TLV_DB_SCALE_ITEM(-300, 0, 0), + 15, 29, TLV_DB_SCALE_ITEM(-300, 150, 0), + 30, 31, TLV_DB_SCALE_ITEM(1800, 0, 0), +); static int wsa883x_get_swr_port(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) -- GitLab From 46188db080bd1df7d2d28031b89e56f2fdbabd67 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 19 Jan 2024 12:24:19 +0100 Subject: [PATCH 0086/1405] ASoC: codecs: lpass-wsa-macro: fix compander volume hack The LPASS WSA macro codec driver is updating the digital gain settings behind the back of user space on DAPM events if companding has been enabled. As compander control is exported to user space, this can result in the digital gain setting being incremented (or decremented) every time the sound server is started and the codec suspended depending on what the UCM configuration looks like. Soon enough playback will become distorted (or too quiet). This is specifically a problem on the Lenovo ThinkPad X13s as this bypasses the limit for the digital gain setting that has been set by the machine driver. Fix this by simply dropping the compander gain offset hack. If someone cares about modelling the impact of the compander setting this can possibly be done by exporting it as a volume control later. Note that the volume registers still need to be written after enabling clocks in order for any prior updates to take effect. Fixes: 2c4066e5d428 ("ASoC: codecs: lpass-wsa-macro: add dapm widgets and route") Cc: stable@vger.kernel.org # 5.11 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://msgid.link/r/20240119112420.7446-4-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-wsa-macro.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c index 7e21cec3c2fb9..6ce309980cd10 100644 --- a/sound/soc/codecs/lpass-wsa-macro.c +++ b/sound/soc/codecs/lpass-wsa-macro.c @@ -1584,7 +1584,6 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w, u16 gain_reg; u16 reg; int val; - int offset_val = 0; struct wsa_macro *wsa = snd_soc_component_get_drvdata(component); if (w->shift == WSA_MACRO_COMP1) { @@ -1623,10 +1622,8 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w, CDC_WSA_RX1_RX_PATH_MIX_SEC0, CDC_WSA_RX_PGA_HALF_DB_MASK, CDC_WSA_RX_PGA_HALF_DB_ENABLE); - offset_val = -2; } val = snd_soc_component_read(component, gain_reg); - val += offset_val; snd_soc_component_write(component, gain_reg, val); wsa_macro_config_ear_spkr_gain(component, wsa, event, gain_reg); @@ -1654,10 +1651,6 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w, CDC_WSA_RX1_RX_PATH_MIX_SEC0, CDC_WSA_RX_PGA_HALF_DB_MASK, CDC_WSA_RX_PGA_HALF_DB_DISABLE); - offset_val = 2; - val = snd_soc_component_read(component, gain_reg); - val += offset_val; - snd_soc_component_write(component, gain_reg, val); } wsa_macro_config_ear_spkr_gain(component, wsa, event, gain_reg); -- GitLab From aafa3acf62f1f63e620753fb9fd75095325617b2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 19 Jan 2024 12:24:20 +0100 Subject: [PATCH 0087/1405] ASoC: codecs: wcd9335: drop unused gain hack remnant The vendor driver appears to be modifying the gain settings behind the back of user space but these hacks never made it upstream except for some essentially dead code that adds a constant zero to the current gain setting on DAPM events. Note that the volume registers still need to be written after enabling clocks in order for any prior updates to take effect. Reviewed-by: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://msgid.link/r/20240119112420.7446-5-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd9335.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sound/soc/codecs/wcd9335.c b/sound/soc/codecs/wcd9335.c index 43c648efd0d93..deb15b95992d5 100644 --- a/sound/soc/codecs/wcd9335.c +++ b/sound/soc/codecs/wcd9335.c @@ -3033,7 +3033,6 @@ static int wcd9335_codec_enable_mix_path(struct snd_soc_dapm_widget *w, { struct snd_soc_component *comp = snd_soc_dapm_to_component(w->dapm); u16 gain_reg; - int offset_val = 0; int val = 0; switch (w->reg) { @@ -3073,7 +3072,6 @@ static int wcd9335_codec_enable_mix_path(struct snd_soc_dapm_widget *w, switch (event) { case SND_SOC_DAPM_POST_PMU: val = snd_soc_component_read(comp, gain_reg); - val += offset_val; snd_soc_component_write(comp, gain_reg, val); break; case SND_SOC_DAPM_POST_PMD: @@ -3294,7 +3292,6 @@ static int wcd9335_codec_enable_interpolator(struct snd_soc_dapm_widget *w, u16 gain_reg; u16 reg; int val; - int offset_val = 0; if (!(snd_soc_dapm_widget_name_cmp(w, "RX INT0 INTERP"))) { reg = WCD9335_CDC_RX0_RX_PATH_CTL; @@ -3337,7 +3334,6 @@ static int wcd9335_codec_enable_interpolator(struct snd_soc_dapm_widget *w, case SND_SOC_DAPM_POST_PMU: wcd9335_config_compander(comp, w->shift, event); val = snd_soc_component_read(comp, gain_reg); - val += offset_val; snd_soc_component_write(comp, gain_reg, val); break; case SND_SOC_DAPM_POST_PMD: -- GitLab From 4d0e8bdfa4a57099dc7230952a460903f2e2f8de Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jan 2024 10:11:30 +0100 Subject: [PATCH 0088/1405] ASoC: codecs: wcd938x: fix headphones volume controls The lowest headphones volume setting does not mute so the leave the TLV mute flag unset. This is specifically needed to let the sound server use the lowest gain setting. Fixes: c03226ba15fe ("ASoC: codecs: wcd938x: fix dB range for HPHL and HPHR") Cc: # 6.5 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://msgid.link/r/20240122091130.27463-1-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index faf8d3f9b3c5d..98055dd39b782 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -210,7 +210,7 @@ struct wcd938x_priv { }; static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(ear_pa_gain, 600, -1800); -static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, -3000); +static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, 0); static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(analog_gain, 0, 3000); struct wcd938x_mbhc_zdet_param { -- GitLab From 1d565de8d53cfa823576abac84e82ab1561f04eb Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Thu, 18 Jan 2024 20:00:19 +0530 Subject: [PATCH 0089/1405] ASoC: amd: acp: Enable rt5682s clocks in acp slave mode Set and enable rt5682s codec bclk and lrclk rates when acp is in slave mode. Signed-off-by: Venkata Prasad Potturu Link: https://msgid.link/r/20240118143023.1903984-1-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-mach-common.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/acp/acp-mach-common.c b/sound/soc/amd/acp/acp-mach-common.c index c90ec34192477..a224043ccd425 100644 --- a/sound/soc/amd/acp/acp-mach-common.c +++ b/sound/soc/amd/acp/acp-mach-common.c @@ -505,6 +505,13 @@ static int acp_card_rt5682s_hw_params(struct snd_pcm_substream *substream, clk_set_rate(drvdata->wclk, srate); clk_set_rate(drvdata->bclk, srate * ch * format); + if (!drvdata->soc_mclk) { + ret = acp_clk_enable(drvdata, srate, ch * format); + if (ret < 0) { + dev_err(rtd->card->dev, "Failed to enable HS clk: %d\n", ret); + return ret; + } + } return 0; } -- GitLab From 4bae2029ffcccfbefb8f31563556494464e7bf2d Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Thu, 18 Jan 2024 20:00:20 +0530 Subject: [PATCH 0090/1405] ASoC: amd: acp: Update platform name for different boards Update platform name for various boards based on rembrandt and renoir platforms. Signed-off-by: Venkata Prasad Potturu Link: https://msgid.link/r/20240118143023.1903984-2-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-sof-mach.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/amd/acp/acp-sof-mach.c b/sound/soc/amd/acp/acp-sof-mach.c index 2a9fd3275e42f..20b94814a0462 100644 --- a/sound/soc/amd/acp/acp-sof-mach.c +++ b/sound/soc/amd/acp/acp-sof-mach.c @@ -48,6 +48,7 @@ static struct acp_card_drvdata sof_rt5682s_rt1019_data = { .hs_codec_id = RT5682S, .amp_codec_id = RT1019, .dmic_codec_id = DMIC, + .platform = RENOIR, .tdm_mode = false, }; @@ -58,6 +59,7 @@ static struct acp_card_drvdata sof_rt5682s_max_data = { .hs_codec_id = RT5682S, .amp_codec_id = MAX98360A, .dmic_codec_id = DMIC, + .platform = RENOIR, .tdm_mode = false, }; @@ -68,6 +70,7 @@ static struct acp_card_drvdata sof_nau8825_data = { .hs_codec_id = NAU8825, .amp_codec_id = MAX98360A, .dmic_codec_id = DMIC, + .platform = REMBRANDT, .soc_mclk = true, .tdm_mode = false, }; @@ -79,6 +82,7 @@ static struct acp_card_drvdata sof_rt5682s_hs_rt1019_data = { .hs_codec_id = RT5682S, .amp_codec_id = RT1019, .dmic_codec_id = DMIC, + .platform = REMBRANDT, .soc_mclk = true, .tdm_mode = false, }; -- GitLab From 6cc2aa9a75f2397d42b78d4c159bc06722183c78 Mon Sep 17 00:00:00 2001 From: Venkata Prasad Potturu Date: Thu, 18 Jan 2024 20:00:21 +0530 Subject: [PATCH 0091/1405] ASoC: amd: acp: Add check for cpu dai link initialization Add condition check for cpu dai link initialization for amplifier codec path, as same pcm id uses for both headset and speaker path for RENOIR platforms. Signed-off-by: Venkata Prasad Potturu Link: https://msgid.link/r/20240118143023.1903984-3-venkataprasad.potturu@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp-mach-common.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/soc/amd/acp/acp-mach-common.c b/sound/soc/amd/acp/acp-mach-common.c index a224043ccd425..504d1b8c4cbb4 100644 --- a/sound/soc/amd/acp/acp-mach-common.c +++ b/sound/soc/amd/acp/acp-mach-common.c @@ -1471,8 +1471,13 @@ int acp_sofdsp_dai_links_create(struct snd_soc_card *card) if (drv_data->amp_cpu_id == I2S_SP) { links[i].name = "acp-amp-codec"; links[i].id = AMP_BE_ID; - links[i].cpus = sof_sp_virtual; - links[i].num_cpus = ARRAY_SIZE(sof_sp_virtual); + if (drv_data->platform == RENOIR) { + links[i].cpus = sof_sp; + links[i].num_cpus = ARRAY_SIZE(sof_sp); + } else { + links[i].cpus = sof_sp_virtual; + links[i].num_cpus = ARRAY_SIZE(sof_sp_virtual); + } links[i].platforms = sof_component; links[i].num_platforms = ARRAY_SIZE(sof_component); links[i].dpcm_playback = 1; -- GitLab From 086df711d9b886194481b4fbe525eb43e9ae7403 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 17 Jan 2024 16:12:06 +0100 Subject: [PATCH 0092/1405] ASoC: codecs: wcd938x: handle deferred probe WCD938x sound codec driver ignores return status of getting regulators and returns EINVAL instead of EPROBE_DEFER. If regulator provider probes after the codec, system is left without probed audio: wcd938x_codec audio-codec: wcd938x_probe: Fail to obtain platform data wcd938x_codec: probe of audio-codec failed with error -22 Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240117151208.1219755-1-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 98055dd39b782..75834ed0365d9 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3589,7 +3589,7 @@ static int wcd938x_probe(struct platform_device *pdev) ret = wcd938x_populate_dt_data(wcd938x, dev); if (ret) { dev_err(dev, "%s: Fail to obtain platform data\n", __func__); - return -EINVAL; + return ret; } ret = wcd938x_add_slave_components(wcd938x, dev, &match); -- GitLab From 22221b13d0c20a9791dec33121df73fe0b2ac226 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 17 Jan 2024 16:12:07 +0100 Subject: [PATCH 0093/1405] ASoC: codecs: wcd938x: skip printing deferred probe failuers Probe calls wcd938x_populate_dt_data() which already prints all the error cases with dev_err_probe(), so skip the additional dev_err(). Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240117151208.1219755-2-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 75834ed0365d9..6021aa5a56891 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3587,10 +3587,8 @@ static int wcd938x_probe(struct platform_device *pdev) mutex_init(&wcd938x->micb_lock); ret = wcd938x_populate_dt_data(wcd938x, dev); - if (ret) { - dev_err(dev, "%s: Fail to obtain platform data\n", __func__); + if (ret) return ret; - } ret = wcd938x_add_slave_components(wcd938x, dev, &match); if (ret) -- GitLab From 35314e39dabcfb256832654ad0e856a9fba744bd Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 17 Jan 2024 16:12:08 +0100 Subject: [PATCH 0094/1405] ASoC: codecs: wcd934x: drop unneeded regulator include Driver does not use any regulator code, so drop redundant include of regulator/consumer.h header. Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240117151208.1219755-3-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd934x.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c index 1b6e376f3833c..6813268e6a19f 100644 --- a/sound/soc/codecs/wcd934x.c +++ b/sound/soc/codecs/wcd934x.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include -- GitLab From 70b4769956651e986591dd94b3ff9649122b1513 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 17 Jan 2024 17:01:44 +0100 Subject: [PATCH 0095/1405] ASoC: allow up to eight CPU/codec DAIs Sound card on Qualcomm X1E80100 CRD board will use eight DAIs in one DAI link, so increase the limit. Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240117160144.1305127-1-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index f8524b5bfb330..516350533e73f 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1037,7 +1037,7 @@ static int soc_dai_link_sanity_check(struct snd_soc_card *card, return -EINVAL; } -#define MAX_DEFAULT_CH_MAP_SIZE 7 +#define MAX_DEFAULT_CH_MAP_SIZE 8 static struct snd_soc_dai_link_ch_map default_ch_map_sync[MAX_DEFAULT_CH_MAP_SIZE] = { { .cpu = 0, .codec = 0 }, { .cpu = 1, .codec = 1 }, @@ -1046,6 +1046,7 @@ static struct snd_soc_dai_link_ch_map default_ch_map_sync[MAX_DEFAULT_CH_MAP_SIZ { .cpu = 4, .codec = 4 }, { .cpu = 5, .codec = 5 }, { .cpu = 6, .codec = 6 }, + { .cpu = 7, .codec = 7 }, }; static struct snd_soc_dai_link_ch_map default_ch_map_1cpu[MAX_DEFAULT_CH_MAP_SIZE] = { { .cpu = 0, .codec = 0 }, @@ -1055,6 +1056,7 @@ static struct snd_soc_dai_link_ch_map default_ch_map_1cpu[MAX_DEFAULT_CH_MAP_SIZ { .cpu = 0, .codec = 4 }, { .cpu = 0, .codec = 5 }, { .cpu = 0, .codec = 6 }, + { .cpu = 0, .codec = 7 }, }; static struct snd_soc_dai_link_ch_map default_ch_map_1codec[MAX_DEFAULT_CH_MAP_SIZE] = { { .cpu = 0, .codec = 0 }, @@ -1064,6 +1066,7 @@ static struct snd_soc_dai_link_ch_map default_ch_map_1codec[MAX_DEFAULT_CH_MAP_S { .cpu = 4, .codec = 0 }, { .cpu = 5, .codec = 0 }, { .cpu = 6, .codec = 0 }, + { .cpu = 7, .codec = 0 }, }; static int snd_soc_compensate_channel_connection_map(struct snd_soc_card *card, struct snd_soc_dai_link *dai_link) -- GitLab From c92688cac239794e4a1d976afa5203a4d3a2ac0e Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 13 Jan 2024 23:46:26 +0100 Subject: [PATCH 0096/1405] regulator: pwm-regulator: Add validity checks in continuous .get_voltage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Continuous regulators can be configured to operate only in a certain duty cycle range (for example from 0..91%). Add a check to error out if the duty cycle translates to an unsupported (or out of range) voltage. Suggested-by: Uwe Kleine-König Signed-off-by: Martin Blumenstingl Link: https://msgid.link/r/20240113224628.377993-2-martin.blumenstingl@googlemail.com Signed-off-by: Mark Brown --- drivers/regulator/pwm-regulator.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c index 698c420e0869b..226ca4c62673f 100644 --- a/drivers/regulator/pwm-regulator.c +++ b/drivers/regulator/pwm-regulator.c @@ -158,6 +158,9 @@ static int pwm_regulator_get_voltage(struct regulator_dev *rdev) pwm_get_state(drvdata->pwm, &pstate); voltage = pwm_get_relative_duty_cycle(&pstate, duty_unit); + if (voltage < min(max_uV_duty, min_uV_duty) || + voltage > max(max_uV_duty, min_uV_duty)) + return -ENOTRECOVERABLE; /* * The dutycycle for min_uV might be greater than the one for max_uV. -- GitLab From 6a7d11efd6915d80a025f2a0be4ae09d797b91ec Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 13 Jan 2024 23:46:27 +0100 Subject: [PATCH 0097/1405] regulator: pwm-regulator: Calculate the output voltage for disabled PWMs If a PWM output is disabled then it's voltage has to be calculated based on a zero duty cycle (for normal polarity) or duty cycle being equal to the PWM period (for inverted polarity). Add support for this to pwm_regulator_get_voltage(). Signed-off-by: Martin Blumenstingl Link: https://msgid.link/r/20240113224628.377993-3-martin.blumenstingl@googlemail.com Signed-off-by: Mark Brown --- drivers/regulator/pwm-regulator.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c index 226ca4c62673f..d27b9a7a30c9b 100644 --- a/drivers/regulator/pwm-regulator.c +++ b/drivers/regulator/pwm-regulator.c @@ -157,6 +157,13 @@ static int pwm_regulator_get_voltage(struct regulator_dev *rdev) pwm_get_state(drvdata->pwm, &pstate); + if (!pstate.enabled) { + if (pstate.polarity == PWM_POLARITY_INVERSED) + pstate.duty_cycle = pstate.period; + else + pstate.duty_cycle = 0; + } + voltage = pwm_get_relative_duty_cycle(&pstate, duty_unit); if (voltage < min(max_uV_duty, min_uV_duty) || voltage > max(max_uV_duty, min_uV_duty)) -- GitLab From b3cbdcc191819b75c04178142e2d0d4c668f68c0 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 13 Jan 2024 23:46:28 +0100 Subject: [PATCH 0098/1405] regulator: pwm-regulator: Manage boot-on with disabled PWM channels Odroid-C1 uses a Monolithic Power Systems MP2161 controlled via PWM for the VDDEE voltage supply of the Meson8b SoC. Commit 6b9352f3f8a1 ("pwm: meson: modify and simplify calculation in meson_pwm_get_state") results in my Odroid-C1 crashing with memory corruption in many different places (seemingly at random). It turns out that this is due to a currently not supported corner case. The VDDEE regulator can generate between 860mV (duty cycle of ~91%) and 1140mV (duty cycle of 0%). We consider it to be enabled by the bootloader (which is why it has the regulator-boot-on flag in .dts) as well as being always-on (which is why it has the regulator-always-on flag in .dts) because the VDDEE voltage is generally required for the Meson8b SoC to work. The public S805 datasheet [0] states on page 17 (where "A5" refers to the Cortex-A5 CPU cores): [...] So if EE domains is shut off, A5 memory is also shut off. That does not matter. Before EE power domain is shut off, A5 should be shut off at first. It turns out that at least some bootloader versions are keeping the PWM output disabled. This is not a problem due to the specific design of the regulator: when the PWM output is disabled the output pin is pulled LOW, effectively achieving a 0% duty cycle (which in return means that VDDEE voltage is at 1140mV). The problem comes when the pwm-regulator driver tries to initialize the PWM output. To do so it reads the current state from the hardware, which is: period: 3666ns duty cycle: 3333ns (= ~91%) enabled: false Then those values are translated using the continuous voltage range to 860mV. Later, when the regulator is being enabled (either by the regulator core due to the always-on flag or first consumer - in this case the lima driver for the Mali-450 GPU) the pwm-regulator driver tries to keep the voltage (at 860mV) and just enable the PWM output. This is when things start to go wrong as the typical voltage used for VDDEE is 1100mV. Commit 6b9352f3f8a1 ("pwm: meson: modify and simplify calculation in meson_pwm_get_state") triggers above condition as before that change period and duty cycle were both at 0. Since the change to the pwm-meson driver is considered correct the solution is to be found in the pwm-regulator driver. Update the duty cycle during driver probe if the regulator is flagged as boot-on so that a call to pwm_regulator_enable() (by the regulator core during initialization of a regulator flagged with boot-on) without any preceding call to pwm_regulator_set_voltage() does not change the output voltage. [0] https://dn.odroid.com/S805/Datasheet/S805_Datasheet%20V0.8%2020150126.pdf Signed-off-by: Martin Blumenstingl Link: https://msgid.link/r/20240113224628.377993-4-martin.blumenstingl@googlemail.com Signed-off-by: Mark Brown --- drivers/regulator/pwm-regulator.c | 33 +++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c index d27b9a7a30c9b..60cfcd741c2af 100644 --- a/drivers/regulator/pwm-regulator.c +++ b/drivers/regulator/pwm-regulator.c @@ -323,6 +323,32 @@ static int pwm_regulator_init_continuous(struct platform_device *pdev, return 0; } +static int pwm_regulator_init_boot_on(struct platform_device *pdev, + struct pwm_regulator_data *drvdata, + const struct regulator_init_data *init_data) +{ + struct pwm_state pstate; + + if (!init_data->constraints.boot_on || drvdata->enb_gpio) + return 0; + + pwm_get_state(drvdata->pwm, &pstate); + if (pstate.enabled) + return 0; + + /* + * Update the duty cycle so the output does not change + * when the regulator core enables the regulator (and + * thus the PWM channel). + */ + if (pstate.polarity == PWM_POLARITY_INVERSED) + pstate.duty_cycle = pstate.period; + else + pstate.duty_cycle = 0; + + return pwm_apply_might_sleep(drvdata->pwm, &pstate); +} + static int pwm_regulator_probe(struct platform_device *pdev) { const struct regulator_init_data *init_data; @@ -382,6 +408,13 @@ static int pwm_regulator_probe(struct platform_device *pdev) if (ret) return ret; + ret = pwm_regulator_init_boot_on(pdev, drvdata, init_data); + if (ret) { + dev_err(&pdev->dev, "Failed to apply boot_on settings: %d\n", + ret); + return ret; + } + regulator = devm_regulator_register(&pdev->dev, &drvdata->desc, &config); if (IS_ERR(regulator)) { -- GitLab From 6154fb9c2134f8d9534b2de10491aa3a22f3c9ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Mon, 22 Jan 2024 11:29:18 -0300 Subject: [PATCH 0099/1405] kselftest: dt: Stop relying on dirname to improve performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When walking directory trees, instead of looking for specific files and running dirname to get the parent folder, traverse all folders and ignore the ones not containing the desired files. This avoids the need to call dirname inside the loop, which drastically decreases run time: Running locally on a mt8192-asurada-spherion, which reports 160 test cases, has gone from 5.5s to 2.9s, while running remotely with an nfsroot has gone from 13.5s to 5.5s. This change has a side-effect, which is that the root DT node now also shows in the output, even though it isn't expected to bind to a driver. However there shouldn't be a matching driver for the board compatible, so the end result will be just an extra skipped test: ok 1 / # SKIP Reported-by: Mark Brown Closes: https://lore.kernel.org/all/310391e8-fdf2-4c2f-a680-7744eb685177@sirena.org.uk Fixes: 14571ab1ad21 ("kselftest: Add new test for detecting unprobed Devicetree devices") Tested-by: Mark Brown Signed-off-by: Nícolas F. R. A. Prado Link: https://lore.kernel.org/r/20240122-dt-kselftest-dirname-perf-fix-v2-1-f1630532fd38@collabora.com Signed-off-by: Rob Herring --- tools/testing/selftests/dt/test_unprobed_devices.sh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/dt/test_unprobed_devices.sh b/tools/testing/selftests/dt/test_unprobed_devices.sh index b07af2a4c4de0..7fae90293a9d8 100755 --- a/tools/testing/selftests/dt/test_unprobed_devices.sh +++ b/tools/testing/selftests/dt/test_unprobed_devices.sh @@ -33,8 +33,8 @@ if [[ ! -d "${PDT}" ]]; then fi nodes_compatible=$( - for node_compat in $(find ${PDT} -name compatible); do - node=$(dirname "${node_compat}") + for node in $(find ${PDT} -type d); do + [ ! -f "${node}"/compatible ] && continue # Check if node is available if [[ -e "${node}"/status ]]; then status=$(tr -d '\000' < "${node}"/status) @@ -46,10 +46,11 @@ nodes_compatible=$( nodes_dev_bound=$( IFS=$'\n' - for uevent in $(find /sys/devices -name uevent); do - if [[ -d "$(dirname "${uevent}")"/driver ]]; then - grep '^OF_FULLNAME=' "${uevent}" | sed -e 's|OF_FULLNAME=||' - fi + for dev_dir in $(find /sys/devices -type d); do + [ ! -f "${dev_dir}"/uevent ] && continue + [ ! -d "${dev_dir}"/driver ] && continue + + grep '^OF_FULLNAME=' "${dev_dir}"/uevent | sed -e 's|OF_FULLNAME=||' done ) -- GitLab From 6dd9a236042e305d7b69ee92db7347bf5943e7d3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 22 Jan 2024 16:04:14 +0100 Subject: [PATCH 0100/1405] soc: microchip: Fix POLARFIRE_SOC_SYS_CTRL input prompt The symbol's prompt should be a one-line description, instead of just duplicating the symbol name. Signed-off-by: Geert Uytterhoeven Signed-off-by: Conor Dooley --- drivers/soc/microchip/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/microchip/Kconfig b/drivers/soc/microchip/Kconfig index 9b0fdd95276e4..19f4b576f822b 100644 --- a/drivers/soc/microchip/Kconfig +++ b/drivers/soc/microchip/Kconfig @@ -1,5 +1,5 @@ config POLARFIRE_SOC_SYS_CTRL - tristate "POLARFIRE_SOC_SYS_CTRL" + tristate "Microchip PolarFire SoC (MPFS) system controller support" depends on POLARFIRE_SOC_MAILBOX depends on MTD help -- GitLab From d53271c05965b4469c57a18c66585075df81c504 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 11 Jan 2024 10:49:22 -0500 Subject: [PATCH 0101/1405] selftests/rseq: Do not skip !allowed_cpus for mm_cid Indexing with mm_cid is incompatible with skipping disallowed cpumask, because concurrency IDs are based on a virtual ID allocation which is unrelated to the physical CPU mask. These issues can be reproduced by running the rseq selftests under a taskset which excludes CPU 0, e.g. taskset -c 10-20 ./run_param_test.sh Signed-off-by: Mathieu Desnoyers Cc: Shuah Khan Cc: Peter Zijlstra Cc: "Paul E. McKenney" Cc: Boqun Feng Signed-off-by: Shuah Khan --- .../selftests/rseq/basic_percpu_ops_test.c | 14 ++++++++++-- tools/testing/selftests/rseq/param_test.c | 22 ++++++++++++++----- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c index 8875429619687..2348d2c20d0a1 100644 --- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c +++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c @@ -24,6 +24,11 @@ bool rseq_validate_cpu_id(void) { return rseq_mm_cid_available(); } +static +bool rseq_use_cpu_index(void) +{ + return false; /* Use mm_cid */ +} #else # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID static @@ -36,6 +41,11 @@ bool rseq_validate_cpu_id(void) { return rseq_current_cpu_raw() >= 0; } +static +bool rseq_use_cpu_index(void) +{ + return true; /* Use cpu_id as index. */ +} #endif struct percpu_lock_entry { @@ -274,7 +284,7 @@ void test_percpu_list(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; for (j = 1; j <= 100; j++) { struct percpu_list_node *node; @@ -299,7 +309,7 @@ void test_percpu_list(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_list_node *node; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while ((node = __percpu_list_pop(&list, i))) { diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 20403d58345cd..2f37961240caa 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -288,6 +288,11 @@ bool rseq_validate_cpu_id(void) { return rseq_mm_cid_available(); } +static +bool rseq_use_cpu_index(void) +{ + return false; /* Use mm_cid */ +} # ifdef TEST_MEMBARRIER /* * Membarrier does not currently support targeting a mm_cid, so @@ -312,6 +317,11 @@ bool rseq_validate_cpu_id(void) { return rseq_current_cpu_raw() >= 0; } +static +bool rseq_use_cpu_index(void) +{ + return true; /* Use cpu_id as index. */ +} # ifdef TEST_MEMBARRIER static int rseq_membarrier_expedited(int cpu) @@ -715,7 +725,7 @@ void test_percpu_list(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; for (j = 1; j <= 100; j++) { struct percpu_list_node *node; @@ -752,7 +762,7 @@ void test_percpu_list(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_list_node *node; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while ((node = __percpu_list_pop(&list, i))) { @@ -902,7 +912,7 @@ void test_percpu_buffer(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; /* Worse-case is every item in same CPU. */ buffer.c[i].array = @@ -952,7 +962,7 @@ void test_percpu_buffer(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_buffer_node *node; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while ((node = __percpu_buffer_pop(&buffer, i))) { @@ -1113,7 +1123,7 @@ void test_percpu_memcpy_buffer(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; /* Worse-case is every item in same CPU. */ buffer.c[i].array = @@ -1160,7 +1170,7 @@ void test_percpu_memcpy_buffer(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_memcpy_buffer_node item; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { -- GitLab From 95a0d596bbd0552a78e13ced43f2be1038883c81 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Fri, 8 Dec 2023 15:31:19 +0800 Subject: [PATCH 0102/1405] iio: core: fix memleak in iio_device_register_sysfs When iio_device_register_sysfs_group() fails, we should free iio_dev_opaque->chan_attr_group.attrs to prevent potential memleak. Fixes: 32f171724e5c ("iio: core: rework iio device group creation") Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20231208073119.29283-1-dinghao.liu@zju.edu.cn Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 9a85752124ddc..173dc00762a15 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -1584,10 +1584,13 @@ static int iio_device_register_sysfs(struct iio_dev *indio_dev) ret = iio_device_register_sysfs_group(indio_dev, &iio_dev_opaque->chan_attr_group); if (ret) - goto error_clear_attrs; + goto error_free_chan_attrs; return 0; +error_free_chan_attrs: + kfree(iio_dev_opaque->chan_attr_group.attrs); + iio_dev_opaque->chan_attr_group.attrs = NULL; error_clear_attrs: iio_free_chan_devattr_list(&iio_dev_opaque->channel_attr_list); -- GitLab From b67f3e653e305abf1471934d7b9fdb9ad2df3eef Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Wed, 20 Dec 2023 12:47:53 -0600 Subject: [PATCH 0103/1405] iio: pressure: bmp280: Add missing bmp085 to SPI id table "bmp085" is missing in bmp280_spi_id[] table, which leads to the next warning in dmesg: SPI driver bmp280 has no spi_device_id for bosch,bmp085 Add "bmp085" to bmp280_spi_id[] by mimicking its existing description in bmp280_of_spi_match[] table to fix the above warning. Signed-off-by: Sam Protsenko Fixes: b26b4e91700f ("iio: pressure: bmp280: add SPI interface driver") Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/bmp280-spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/pressure/bmp280-spi.c b/drivers/iio/pressure/bmp280-spi.c index 433d6fac83c4c..e8a5fed07e888 100644 --- a/drivers/iio/pressure/bmp280-spi.c +++ b/drivers/iio/pressure/bmp280-spi.c @@ -87,6 +87,7 @@ static const struct of_device_id bmp280_of_spi_match[] = { MODULE_DEVICE_TABLE(of, bmp280_of_spi_match); static const struct spi_device_id bmp280_spi_id[] = { + { "bmp085", (kernel_ulong_t)&bmp180_chip_info }, { "bmp180", (kernel_ulong_t)&bmp180_chip_info }, { "bmp181", (kernel_ulong_t)&bmp180_chip_info }, { "bmp280", (kernel_ulong_t)&bmp280_chip_info }, -- GitLab From 792595bab4925aa06532a14dd256db523eb4fa5e Mon Sep 17 00:00:00 2001 From: "zhili.liu" Date: Tue, 2 Jan 2024 09:07:11 +0800 Subject: [PATCH 0104/1405] iio: magnetometer: rm3100: add boundary check for the value read from RM3100_REG_TMRC Recently, we encounter kernel crash in function rm3100_common_probe caused by out of bound access of array rm3100_samp_rates (because of underlying hardware failures). Add boundary check to prevent out of bound access. Fixes: 121354b2eceb ("iio: magnetometer: Add driver support for PNI RM3100") Suggested-by: Zhouyi Zhou Signed-off-by: zhili.liu Link: https://lore.kernel.org/r/1704157631-3814-1-git-send-email-zhouzhouyi@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/rm3100-core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/iio/magnetometer/rm3100-core.c b/drivers/iio/magnetometer/rm3100-core.c index 69938204456f8..42b70cd42b393 100644 --- a/drivers/iio/magnetometer/rm3100-core.c +++ b/drivers/iio/magnetometer/rm3100-core.c @@ -530,6 +530,7 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq) struct rm3100_data *data; unsigned int tmp; int ret; + int samp_rate_index; indio_dev = devm_iio_device_alloc(dev, sizeof(*data)); if (!indio_dev) @@ -586,9 +587,14 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq) ret = regmap_read(regmap, RM3100_REG_TMRC, &tmp); if (ret < 0) return ret; + + samp_rate_index = tmp - RM3100_TMRC_OFFSET; + if (samp_rate_index < 0 || samp_rate_index >= RM3100_SAMP_NUM) { + dev_err(dev, "The value read from RM3100_REG_TMRC is invalid!\n"); + return -EINVAL; + } /* Initializing max wait time, which is double conversion time. */ - data->conversion_time = rm3100_samp_rates[tmp - RM3100_TMRC_OFFSET][2] - * 2; + data->conversion_time = rm3100_samp_rates[samp_rate_index][2] * 2; /* Cycle count values may not be what we want. */ if ((tmp - RM3100_TMRC_OFFSET) == 0) -- GitLab From 35ec2d03b282a939949090bd8c39eb37a5856721 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 10 Jan 2024 10:56:11 -0800 Subject: [PATCH 0105/1405] iio: imu: bno055: serdev requires REGMAP There are a ton of build errors when REGMAP is not set, so select REGMAP to fix all of them. Examples (not all of them): ../drivers/iio/imu/bno055/bno055_ser_core.c:495:15: error: variable 'bno055_ser_regmap_bus' has initializer but incomplete type 495 | static struct regmap_bus bno055_ser_regmap_bus = { ../drivers/iio/imu/bno055/bno055_ser_core.c:496:10: error: 'struct regmap_bus' has no member named 'write' 496 | .write = bno055_ser_write_reg, ../drivers/iio/imu/bno055/bno055_ser_core.c:497:10: error: 'struct regmap_bus' has no member named 'read' 497 | .read = bno055_ser_read_reg, ../drivers/iio/imu/bno055/bno055_ser_core.c: In function 'bno055_ser_probe': ../drivers/iio/imu/bno055/bno055_ser_core.c:532:18: error: implicit declaration of function 'devm_regmap_init'; did you mean 'vmem_map_init'? [-Werror=implicit-function-declaration] 532 | regmap = devm_regmap_init(&serdev->dev, &bno055_ser_regmap_bus, ../drivers/iio/imu/bno055/bno055_ser_core.c:532:16: warning: assignment to 'struct regmap *' from 'int' makes pointer from integer without a cast [-Wint-conversion] 532 | regmap = devm_regmap_init(&serdev->dev, &bno055_ser_regmap_bus, ../drivers/iio/imu/bno055/bno055_ser_core.c: At top level: ../drivers/iio/imu/bno055/bno055_ser_core.c:495:26: error: storage size of 'bno055_ser_regmap_bus' isn't known 495 | static struct regmap_bus bno055_ser_regmap_bus = { Fixes: 2eef5a9cc643 ("iio: imu: add BNO055 serdev driver") Signed-off-by: Randy Dunlap Cc: Andrea Merello Cc: Jonathan Cameron Cc: Lars-Peter Clausen Cc: linux-iio@vger.kernel.org Cc: Link: https://lore.kernel.org/r/20240110185611.19723-1-rdunlap@infradead.org Signed-off-by: Jonathan Cameron --- drivers/iio/imu/bno055/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/imu/bno055/Kconfig b/drivers/iio/imu/bno055/Kconfig index 83e53acfbe880..c7f5866a177d9 100644 --- a/drivers/iio/imu/bno055/Kconfig +++ b/drivers/iio/imu/bno055/Kconfig @@ -8,6 +8,7 @@ config BOSCH_BNO055 config BOSCH_BNO055_SERIAL tristate "Bosch BNO055 attached via UART" depends on SERIAL_DEV_BUS + select REGMAP select BOSCH_BNO055 help Enable this to support Bosch BNO055 IMUs attached via UART. -- GitLab From f1dfcbaa7b9d131859b0167c428480ae6e7e817d Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 21 Jan 2024 13:46:16 +0000 Subject: [PATCH 0106/1405] iio: humidity: hdc3020: Add Makefile, Kconfig and MAINTAINERS entry Something when wrong when applying the original patch and only the c file made it in. Here the rest of the changes are applied. Fixes: c9180b8e39be ("iio: humidity: Add driver for ti HDC302x humidity sensors") Reported-by: Lars-Peter Clausen Cc: Javier Carrasco Cc: Li peiyu <579lpy@gmail.com> Signed-off-by: Jonathan Cameron Reviewed-by: Lars-Peter Clausen --- MAINTAINERS | 8 ++++++++ drivers/iio/humidity/Kconfig | 12 ++++++++++++ drivers/iio/humidity/Makefile | 1 + 3 files changed, 21 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a692..1b590e6f6bddd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22005,6 +22005,14 @@ F: Documentation/devicetree/bindings/media/i2c/ti,ds90* F: drivers/media/i2c/ds90* F: include/media/i2c/ds90* +TI HDC302X HUMIDITY DRIVER +M: Javier Carrasco +M: Li peiyu <579lpy@gmail.com> +L: linux-iio@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/iio/humidity/ti,hdc3020.yaml +F: drivers/iio/humidity/hdc3020.c + TI ICSSG ETHERNET DRIVER (ICSSG) R: MD Danish Anwar R: Roger Quadros diff --git a/drivers/iio/humidity/Kconfig b/drivers/iio/humidity/Kconfig index 2de5494e7c225..b15b7a3b66d5a 100644 --- a/drivers/iio/humidity/Kconfig +++ b/drivers/iio/humidity/Kconfig @@ -48,6 +48,18 @@ config HDC2010 To compile this driver as a module, choose M here: the module will be called hdc2010. +config HDC3020 + tristate "TI HDC3020 relative humidity and temperature sensor" + depends on I2C + select CRC8 + help + Say yes here to build support for the Texas Instruments + HDC3020, HDC3021 and HDC3022 relative humidity and temperature + sensors. + + To compile this driver as a module, choose M here: the module + will be called hdc3020. + config HID_SENSOR_HUMIDITY tristate "HID Environmental humidity sensor" depends on HID_SENSOR_HUB diff --git a/drivers/iio/humidity/Makefile b/drivers/iio/humidity/Makefile index f19ff3de97c56..5fbeef299f61b 100644 --- a/drivers/iio/humidity/Makefile +++ b/drivers/iio/humidity/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_AM2315) += am2315.o obj-$(CONFIG_DHT11) += dht11.o obj-$(CONFIG_HDC100X) += hdc100x.o obj-$(CONFIG_HDC2010) += hdc2010.o +obj-$(CONFIG_HDC3020) += hdc3020.o obj-$(CONFIG_HID_SENSOR_HUMIDITY) += hid-sensor-humidity.o hts221-y := hts221_core.o \ -- GitLab From 8e98b87f515d8c4bae521048a037b2cc431c3fd5 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Wed, 17 Jan 2024 14:10:49 +0100 Subject: [PATCH 0107/1405] iio: imu: adis: ensure proper DMA alignment Aligning the buffer to the L1 cache is not sufficient in some platforms as they might have larger cacheline sizes for caches after L1 and thus, we can't guarantee DMA safety. That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same for the sigma_delta ADCs. [1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/ Fixes: ccd2b52f4ac6 ("staging:iio: Add common ADIS library") Signed-off-by: Nuno Sa Link: https://lore.kernel.org/r/20240117-adis-improv-v1-1-7f90e9fad200@analog.com Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index dc9ea299e0885..8898966bc0f08 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -11,6 +11,7 @@ #include #include +#include #include #define ADIS_WRITE_REG(reg) ((0x80 | (reg))) @@ -131,7 +132,7 @@ struct adis { unsigned long irq_flag; void *buffer; - u8 tx[10] ____cacheline_aligned; + u8 tx[10] __aligned(IIO_DMA_MINALIGN); u8 rx[4]; }; -- GitLab From 59598510be1d49e1cff7fd7593293bb8e1b2398b Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Wed, 17 Jan 2024 13:41:03 +0100 Subject: [PATCH 0108/1405] iio: adc: ad_sigma_delta: ensure proper DMA alignment Aligning the buffer to the L1 cache is not sufficient in some platforms as they might have larger cacheline sizes for caches after L1 and thus, we can't guarantee DMA safety. That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same for the sigma_delta ADCs. [1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/ Fixes: 0fb6ee8d0b5e ("iio: ad_sigma_delta: Don't put SPI transfer buffer on the stack") Signed-off-by: Nuno Sa Link: https://lore.kernel.org/r/20240117-dev_sigma_delta_no_irq_flags-v1-1-db39261592cf@analog.com Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/adc/ad_sigma_delta.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 7852f6c9a714c..719cf9cc6e1ac 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -8,6 +8,8 @@ #ifndef __AD_SIGMA_DELTA_H__ #define __AD_SIGMA_DELTA_H__ +#include + enum ad_sigma_delta_mode { AD_SD_MODE_CONTINUOUS = 0, AD_SD_MODE_SINGLE = 1, @@ -99,7 +101,7 @@ struct ad_sigma_delta { * 'rx_buf' is up to 32 bits per sample + 64 bit timestamp, * rounded to 16 bytes to take into account padding. */ - uint8_t tx_buf[4] ____cacheline_aligned; + uint8_t tx_buf[4] __aligned(IIO_DMA_MINALIGN); uint8_t rx_buf[16] __aligned(8); }; -- GitLab From 9c46e3a5232d855a65f440b298a2a66497f799d2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 8 Jan 2024 12:07:02 +0300 Subject: [PATCH 0109/1405] iio: adc: ad7091r8: Fix error code in ad7091r8_gpio_setup() There is a copy and paste error so it accidentally returns ->convst_gpio instead of ->reset_gpio. Fix it. Fixes: 0b76ff46c463 ("iio: adc: Add support for AD7091R-8") Signed-off-by: Dan Carpenter Reviewed-by: Marcelo Schmitt Link: https://lore.kernel.org/r/fd905ad0-6413-489c-9a3b-90c0cdb35ec9@moroto.mountain Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7091r8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7091r8.c b/drivers/iio/adc/ad7091r8.c index 57700f1248038..7005643050575 100644 --- a/drivers/iio/adc/ad7091r8.c +++ b/drivers/iio/adc/ad7091r8.c @@ -195,7 +195,7 @@ static int ad7091r8_gpio_setup(struct ad7091r_state *st) st->reset_gpio = devm_gpiod_get_optional(st->dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(st->reset_gpio)) - return dev_err_probe(st->dev, PTR_ERR(st->convst_gpio), + return dev_err_probe(st->dev, PTR_ERR(st->reset_gpio), "Error on requesting reset GPIO\n"); if (st->reset_gpio) { -- GitLab From 92c02d74ba7b7cdf3887ed56bc9af38c3ee17a8b Mon Sep 17 00:00:00 2001 From: Fei Shao Date: Mon, 22 Jan 2024 14:20:27 +0800 Subject: [PATCH 0110/1405] ASoC: codecs: ES8326: Remove executable bit Remove the executable bit that was unintentionally turned on. Fixes: ee09084fbf9f ("ASoC: codecs: ES8326: Add chip version flag") Signed-off-by: Fei Shao Link: https://msgid.link/r/20240122062055.1673597-1-fshao@chromium.org Signed-off-by: Mark Brown --- sound/soc/codecs/es8326.c | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 sound/soc/codecs/es8326.c diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c old mode 100755 new mode 100644 -- GitLab From c481016bb4f8a9c059c39ac06e7b65e233a61f6a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Jan 2024 19:18:17 +0100 Subject: [PATCH 0111/1405] ASoC: qcom: sc8280xp: limit speaker volumes The UCM configuration for the Lenovo ThinkPad X13s has up until now been setting the speaker PA volume to the minimum -3 dB when enabling the speakers, but this does not prevent the user from increasing the volume further. Limit the digital gain and PA volumes to a combined -3 dB in the machine driver to reduce the risk of speaker damage until we have active speaker protection in place (or higher safe levels have been established). Note that the PA volume limit cannot be set lower than 0 dB or PulseAudio gets confused when the first 16 levels all map to -3 dB. Also note that this will probably need to be generalised using machine-specific limits, but a common limit should do for now. Cc: # 6.5 Signed-off-by: Johan Hovold Link: https://msgid.link/r/20240122181819.4038-3-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/qcom/sc8280xp.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sound/soc/qcom/sc8280xp.c b/sound/soc/qcom/sc8280xp.c index ed4bb551bfbb9..b7fd503a16666 100644 --- a/sound/soc/qcom/sc8280xp.c +++ b/sound/soc/qcom/sc8280xp.c @@ -32,12 +32,14 @@ static int sc8280xp_snd_init(struct snd_soc_pcm_runtime *rtd) case WSA_CODEC_DMA_RX_0: case WSA_CODEC_DMA_RX_1: /* - * set limit of 0dB on Digital Volume for Speakers, - * this can prevent damage of speakers to some extent without - * active speaker protection + * Set limit of -3 dB on Digital Volume and 0 dB on PA Volume + * to reduce the risk of speaker damage until we have active + * speaker protection in place. */ - snd_soc_limit_volume(card, "WSA_RX0 Digital Volume", 84); - snd_soc_limit_volume(card, "WSA_RX1 Digital Volume", 84); + snd_soc_limit_volume(card, "WSA_RX0 Digital Volume", 81); + snd_soc_limit_volume(card, "WSA_RX1 Digital Volume", 81); + snd_soc_limit_volume(card, "SpkrLeft PA Volume", 17); + snd_soc_limit_volume(card, "SpkrRight PA Volume", 17); break; default: break; -- GitLab From afa6ac2690bb9904ff883c6e942281e1032a484d Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 22 Jan 2024 21:32:01 +0100 Subject: [PATCH 0112/1405] HID: logitech-hidpp: add support for Logitech G Pro X Superlight 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let logitech-hidpp driver claim Logitech G Pro X Superlight 2. Reported-by: Marcus Rückert Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-hidpp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index fd6d8f1d9b8f6..6ef0c88e3e60a 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4610,6 +4610,8 @@ static const struct hid_device_id hidpp_devices[] = { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC088) }, { /* Logitech G Pro X Superlight Gaming Mouse over USB */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC094) }, + { /* Logitech G Pro X Superlight 2 Gaming Mouse over USB */ + HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC09b) }, { /* G935 Gaming Headset */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0x0a87), -- GitLab From 3526860f26febbe46960f9b37f5dbd5ccc109ea8 Mon Sep 17 00:00:00 2001 From: Rui Salvaterra Date: Mon, 22 Jan 2024 11:45:12 +0000 Subject: [PATCH 0113/1405] ALSA: hda: Replace numeric device IDs with constant values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have self-explanatory constants for Intel HDA devices, let's use them instead of magic numbers and code comments. Signed-off-by: Rui Salvaterra Reviewed-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20240122114512.55808-2-rsalvaterra@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 2276adc844784..66f013ee160d2 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1729,8 +1729,8 @@ static int default_bdl_pos_adj(struct azx *chip) /* some exceptions: Atoms seem problematic with value 1 */ if (chip->pci->vendor == PCI_VENDOR_ID_INTEL) { switch (chip->pci->device) { - case 0x0f04: /* Baytrail */ - case 0x2284: /* Braswell */ + case PCI_DEVICE_ID_INTEL_HDA_BYT: + case PCI_DEVICE_ID_INTEL_HDA_BSW: return 32; } } -- GitLab From 56beedc88405fd8022edfd1c2e63d1bc6c95efcb Mon Sep 17 00:00:00 2001 From: Rui Salvaterra Date: Mon, 22 Jan 2024 11:45:13 +0000 Subject: [PATCH 0114/1405] ALSA: hda: Increase default bdl_pos_adj for Apollo Lake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apollo Lake seems to also suffer from IRQ timing issues. After being up for ~4 minutes, a Pentium N4200 system ends up falling back to workqueue-based IRQ handling: [ 208.019906] snd_hda_intel 0000:00:0e.0: IRQ timing workaround is activated for card #0. Suggest a bigger bdl_pos_adj. Unfortunately, the Baytrail and Braswell workaround value of 32 samples isn't enough to fix the issue here. Default to 64 samples. Signed-off-by: Rui Salvaterra Reviewed-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20240122114512.55808-3-rsalvaterra@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 66f013ee160d2..1b550c42db092 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1732,6 +1732,8 @@ static int default_bdl_pos_adj(struct azx *chip) case PCI_DEVICE_ID_INTEL_HDA_BYT: case PCI_DEVICE_ID_INTEL_HDA_BSW: return 32; + case PCI_DEVICE_ID_INTEL_HDA_APL: + return 64; } } -- GitLab From a2ed0a44d637ef9deca595054c206da7d6cbdcbc Mon Sep 17 00:00:00 2001 From: Vitaly Rodionov Date: Mon, 22 Jan 2024 18:47:10 +0000 Subject: [PATCH 0115/1405] ALSA: hda/cs8409: Suppress vmaster control for Dolphin models Customer has reported an issue with specific desktop platform where two CS42L42 codecs are connected to CS8409 HDA bridge. If "Master Volume Control" is created then on Ubuntu OS UCM left/right balance slider in UI audio settings has no effect. This patch will fix this issue for a target paltform. Fixes: 20e507724113 ("ALSA: hda/cs8409: Add support for dolphin") Signed-off-by: Vitaly Rodionov Cc: Link: https://lore.kernel.org/r/20240122184710.5802-1-vitalyr@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_cs8409.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_cs8409.c b/sound/pci/hda/patch_cs8409.c index 627899959ffe8..e41316e2e9833 100644 --- a/sound/pci/hda/patch_cs8409.c +++ b/sound/pci/hda/patch_cs8409.c @@ -1371,6 +1371,7 @@ void dolphin_fixups(struct hda_codec *codec, const struct hda_fixup *fix, int ac spec->scodecs[CS8409_CODEC1] = &dolphin_cs42l42_1; spec->scodecs[CS8409_CODEC1]->codec = codec; spec->num_scodecs = 2; + spec->gen.suppress_vmaster = 1; codec->patch_ops = cs8409_dolphin_patch_ops; -- GitLab From a969210066054ea109d8b7aff29a9b1c98776841 Mon Sep 17 00:00:00 2001 From: Julian Sikorski Date: Tue, 23 Jan 2024 09:49:35 +0100 Subject: [PATCH 0116/1405] ALSA: usb-audio: Add a quirk for Yamaha YIT-W12TX transmitter The device fails to initialize otherwise, giving the following error: [ 3676.671641] usb 2-1.1: 1:1: cannot get freq at ep 0x1 Signed-off-by: Julian Sikorski Cc: Link: https://lore.kernel.org/r/20240123084935.2745-1-belegdol+github@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 07cc6a201579a..43c14a81a1e2f 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2031,6 +2031,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M | QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x0499, 0x1509, /* Steinberg UR22 */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x0499, 0x3108, /* Yamaha YIT-W12TX */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x04d8, 0xfeea, /* Benchmark DAC1 Pre */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x04e8, 0xa051, /* Samsung USBC Headset (AKG) */ -- GitLab From b6eda11c44dc89a681e1c105f0f4660e69b1e183 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 19 Jan 2024 14:07:14 +0800 Subject: [PATCH 0117/1405] HID: nvidia-shield: Add missing null pointer checks to LED initialization devm_kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. [jkosina@suse.com: tweak changelog a bit] Signed-off-by: Kunwu Chan Reviewed-by: Rahul Rameshbabu Signed-off-by: Jiri Kosina --- drivers/hid/hid-nvidia-shield.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-nvidia-shield.c b/drivers/hid/hid-nvidia-shield.c index 82d0a77359c46..58b15750dbb0a 100644 --- a/drivers/hid/hid-nvidia-shield.c +++ b/drivers/hid/hid-nvidia-shield.c @@ -800,6 +800,8 @@ static inline int thunderstrike_led_create(struct thunderstrike *ts) led->name = devm_kasprintf(&ts->base.hdev->dev, GFP_KERNEL, "thunderstrike%d:blue:led", ts->id); + if (!led->name) + return -ENOMEM; led->max_brightness = 1; led->flags = LED_CORE_SUSPENDRESUME | LED_RETAIN_AT_SHUTDOWN; led->brightness_get = &thunderstrike_led_get_brightness; @@ -831,6 +833,8 @@ static inline int thunderstrike_psy_create(struct shield_device *shield_dev) shield_dev->battery_dev.desc.name = devm_kasprintf(&ts->base.hdev->dev, GFP_KERNEL, "thunderstrike_%d", ts->id); + if (!shield_dev->battery_dev.desc.name) + return -ENOMEM; shield_dev->battery_dev.psy = power_supply_register( &hdev->dev, &shield_dev->battery_dev.desc, &psy_cfg); -- GitLab From 26dd6a5667f500c5d991f90a9ac5998a71afaf5c Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 15 Jan 2024 12:50:51 +0800 Subject: [PATCH 0118/1405] HID: i2c-hid: Skip SET_POWER SLEEP for Cirque touchpad on system suspend There's a Cirque touchpad that wakes system up without anything touched the touchpad. The input report is empty when this happens. The reason is stated in HID over I2C spec, 7.2.8.2: "If the DEVICE wishes to wake the HOST from its low power state, it can issue a wake by asserting the interrupt." This is fine if OS can put system back to suspend by identifying input wakeup count stays the same on resume, like Chrome OS Dark Resume [0]. But for regular distro such policy is lacking. Though the change doesn't bring any impact on power consumption for touchpad is minimal, other i2c-hid device may depends on SLEEP control power. So use a quirk to limit the change scope. [0] https://chromium.googlesource.com/chromiumos/platform2/+/HEAD/power_manager/docs/dark_resume.md Signed-off-by: Kai-Heng Feng Reviewed-by: Douglas Anderson Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/i2c-hid/i2c-hid-core.c | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index fb30e228d35f9..828a5c022c640 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -298,6 +298,9 @@ #define USB_VENDOR_ID_CIDC 0x1677 +#define I2C_VENDOR_ID_CIRQUE 0x0488 +#define I2C_PRODUCT_ID_CIRQUE_1063 0x1063 + #define USB_VENDOR_ID_CJTOUCH 0x24b8 #define USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0020 0x0020 #define USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0040 0x0040 diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 90f316ae9819a..2df1ab3c31cc5 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -49,6 +49,7 @@ #define I2C_HID_QUIRK_RESET_ON_RESUME BIT(2) #define I2C_HID_QUIRK_BAD_INPUT_SIZE BIT(3) #define I2C_HID_QUIRK_NO_WAKEUP_AFTER_RESET BIT(4) +#define I2C_HID_QUIRK_NO_SLEEP_ON_SUSPEND BIT(5) /* Command opcodes */ #define I2C_HID_OPCODE_RESET 0x01 @@ -131,6 +132,8 @@ static const struct i2c_hid_quirks { I2C_HID_QUIRK_RESET_ON_RESUME }, { USB_VENDOR_ID_ITE, I2C_DEVICE_ID_ITE_LENOVO_LEGION_Y720, I2C_HID_QUIRK_BAD_INPUT_SIZE }, + { I2C_VENDOR_ID_CIRQUE, I2C_PRODUCT_ID_CIRQUE_1063, + I2C_HID_QUIRK_NO_SLEEP_ON_SUSPEND }, /* * Sending the wakeup after reset actually break ELAN touchscreen controller */ @@ -956,7 +959,8 @@ static int i2c_hid_core_suspend(struct i2c_hid *ihid, bool force_poweroff) return ret; /* Save some power */ - i2c_hid_set_power(ihid, I2C_HID_PWR_SLEEP); + if (!(ihid->quirks & I2C_HID_QUIRK_NO_SLEEP_ON_SUSPEND)) + i2c_hid_set_power(ihid, I2C_HID_PWR_SLEEP); disable_irq(client->irq); -- GitLab From c41336f4d69057cbf88fed47951379b384540df5 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 25 Dec 2023 15:36:15 +0200 Subject: [PATCH 0119/1405] pmdomain: mediatek: fix race conditions with genpd If the power domains are registered first with genpd and *after that* the driver attempts to power them on in the probe sequence, then it is possible that a race condition occurs if genpd tries to power them on in the same time. The same is valid for powering them off before unregistering them from genpd. Attempt to fix race conditions by first removing the domains from genpd and *after that* powering down domains. Also first power up the domains and *after that* register them to genpd. Fixes: 59b644b01cf4 ("soc: mediatek: Add MediaTek SCPSYS power domains") Signed-off-by: Eugen Hristev Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231225133615.78993-1-eugen.hristev@collabora.com Signed-off-by: Ulf Hansson --- drivers/pmdomain/mediatek/mtk-pm-domains.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/pmdomain/mediatek/mtk-pm-domains.c b/drivers/pmdomain/mediatek/mtk-pm-domains.c index e26dc17d07ad7..e274e3315fe7a 100644 --- a/drivers/pmdomain/mediatek/mtk-pm-domains.c +++ b/drivers/pmdomain/mediatek/mtk-pm-domains.c @@ -561,6 +561,11 @@ static int scpsys_add_subdomain(struct scpsys *scpsys, struct device_node *paren goto err_put_node; } + /* recursive call to add all subdomains */ + ret = scpsys_add_subdomain(scpsys, child); + if (ret) + goto err_put_node; + ret = pm_genpd_add_subdomain(parent_pd, child_pd); if (ret) { dev_err(scpsys->dev, "failed to add %s subdomain to parent %s\n", @@ -570,11 +575,6 @@ static int scpsys_add_subdomain(struct scpsys *scpsys, struct device_node *paren dev_dbg(scpsys->dev, "%s add subdomain: %s\n", parent_pd->name, child_pd->name); } - - /* recursive call to add all subdomains */ - ret = scpsys_add_subdomain(scpsys, child); - if (ret) - goto err_put_node; } return 0; @@ -588,9 +588,6 @@ static void scpsys_remove_one_domain(struct scpsys_domain *pd) { int ret; - if (scpsys_domain_is_on(pd)) - scpsys_power_off(&pd->genpd); - /* * We're in the error cleanup already, so we only complain, * but won't emit another error on top of the original one. @@ -600,6 +597,8 @@ static void scpsys_remove_one_domain(struct scpsys_domain *pd) dev_err(pd->scpsys->dev, "failed to remove domain '%s' : %d - state may be inconsistent\n", pd->genpd.name, ret); + if (scpsys_domain_is_on(pd)) + scpsys_power_off(&pd->genpd); clk_bulk_put(pd->num_clks, pd->clks); clk_bulk_put(pd->num_subsys_clks, pd->subsys_clks); -- GitLab From afb2a4fb84555ef9e61061f6ea63ed7087b295d5 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Fri, 12 Jan 2024 19:37:29 +0100 Subject: [PATCH 0120/1405] riscv/efistub: Ensure GP-relative addressing is not used The cflags for the RISC-V efistub were missing -mno-relax, thus were under the risk that the compiler could use GP-relative addressing. That happened for _edata with binutils-2.41 and kernel 6.1, causing the relocation to fail due to an invalid kernel_size in handle_kernel_image. It was not yet observed with newer versions, but that may just be luck. Cc: Signed-off-by: Jan Kiszka Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 06964a3c130f6..d561d7de46a99 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -28,7 +28,7 @@ cflags-$(CONFIG_ARM) += -DEFI_HAVE_STRLEN -DEFI_HAVE_STRNLEN \ -DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \ -DEFI_HAVE_STRCMP -fno-builtin -fpic \ $(call cc-option,-mno-single-pic-base) -cflags-$(CONFIG_RISCV) += -fpic -DNO_ALTERNATIVE +cflags-$(CONFIG_RISCV) += -fpic -DNO_ALTERNATIVE -mno-relax cflags-$(CONFIG_LOONGARCH) += -fpie cflags-$(CONFIG_EFI_PARAMS_FROM_FDT) += -I$(srctree)/scripts/dtc/libfdt -- GitLab From d2baf8cc82c17459fca019a12348efcf86bfec29 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 16 Jan 2024 14:52:27 +0100 Subject: [PATCH 0121/1405] riscv/efistub: Tighten ELF relocation check The EFI stub makefile contains logic to ensure that the objects that make up the stub do not contain relocations that require runtime fixups (typically to account for the runtime load address of the executable) On RISC-V, we also avoid GP based relocations, as they require that GP is assigned the correct base in the startup code, which is not implemented in the EFI stub. So add these relocation types to the grep expression that is used to carry out this check. Link: https://lkml.kernel.org/r/42c63cb9-87d0-49db-9af8-95771b186684%40siemens.com Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index d561d7de46a99..73f4810f6db38 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -143,7 +143,7 @@ STUBCOPY_RELOC-$(CONFIG_ARM64) := R_AARCH64_ABS # exist. STUBCOPY_FLAGS-$(CONFIG_RISCV) += --prefix-alloc-sections=.init \ --prefix-symbols=__efistub_ -STUBCOPY_RELOC-$(CONFIG_RISCV) := R_RISCV_HI20 +STUBCOPY_RELOC-$(CONFIG_RISCV) := -E R_RISCV_HI20\|R_RISCV_$(BITS)\|R_RISCV_RELAX # For LoongArch, keep all the symbols in .init section and make sure that no # absolute symbols references exist. -- GitLab From 67794f882adca00d043899ac248bc002751da9f6 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Tue, 23 Jan 2024 16:46:35 +0300 Subject: [PATCH 0122/1405] ALSA: usb-audio: Skip setting clock selector for single connections Since commit 086b957cc17f5 ("ALSA: usb-audio: Skip the clock selector inquiry for single connections") we are already skipping clock selector inquiry if only one clock source is connected, but we are still sending a set request. Lets skip that too. This should fix errors when setting a sample rate on devices that don't have any controls present within the clock selector. An example of such device is the new revision of MOTU M Series (07fd:000b): AudioControl Interface Descriptor: bLength 8 bDescriptorType 36 bDescriptorSubtype 11 (CLOCK_SELECTOR) bClockID 1 bNrInPins 1 baCSourceID(0) 2 bmControls 0x00 iClockSelector 0 Perhaps we also should check if clock selectors are readable and writeable like we already do for clock sources, but this is out of scope of this patch. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217601 Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20240123134635.54026-1-alexander@tsoy.me Signed-off-by: Takashi Iwai --- sound/usb/clock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 33db334e65566..94e4aaeafe588 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -325,7 +325,8 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, visited, validate); if (ret > 0) { /* Skip setting clock selector again for some devices */ - if (chip->quirk_flags & QUIRK_FLAG_SKIP_CLOCK_SELECTOR) + if (chip->quirk_flags & QUIRK_FLAG_SKIP_CLOCK_SELECTOR || + pins == 1) return ret; err = uac_clock_selector_set_val(chip, entity_id, cur); if (err < 0) -- GitLab From a67e1f0bd4564b485e0f0c3ed7f6bf17688be268 Mon Sep 17 00:00:00 2001 From: Romain Naour Date: Tue, 23 Jan 2024 12:14:56 +0100 Subject: [PATCH 0123/1405] regulator: ti-abb: don't use devm_platform_ioremap_resource_byname for shared interrupt register We can't use devm_platform_ioremap_resource_byname() to remap the interrupt register that can be shared between regulator-abb-{ivahd,dspeve,gpu} drivers instances. The combined helper introduce a call to devm_request_mem_region() that creates a new busy resource region on PRM_IRQSTATUS_MPU register (0x4ae06010). The first devm_request_mem_region() call succeeds for regulator-abb-ivahd but fails for the two other regulator-abb-dspeve and regulator-abb-gpu. # cat /proc/iomem | grep -i 4ae06 4ae06010-4ae06013 : 4ae07e34.regulator-abb-ivahd int-address 4ae06014-4ae06017 : 4ae07ddc.regulator-abb-mpu int-address regulator-abb-dspeve and regulator-abb-gpu are missing due to devm_request_mem_region() failure (EBUSY): [ 1.326660] ti_abb 4ae07e30.regulator-abb-dspeve: can't request region for resource [mem 0x4ae06010-0x4ae06013] [ 1.326660] ti_abb: probe of 4ae07e30.regulator-abb-dspeve failed with error -16 [ 1.327239] ti_abb 4ae07de4.regulator-abb-gpu: can't request region for resource [mem 0x4ae06010-0x4ae06013] [ 1.327270] ti_abb: probe of 4ae07de4.regulator-abb-gpu failed with error -16 >From arm/boot/dts/dra7.dtsi: The abb_mpu is the only instance using its own interrupt register: (0x4ae06014) PRM_IRQSTATUS_MPU_2, ABB_MPU_DONE_ST (bit 7) The other tree instances (abb_ivahd, abb_dspeve, abb_gpu) share PRM_IRQSTATUS_MPU register (0x4ae06010) but use different bits ABB_IVA_DONE_ST (bit 30), ABB_DSPEVE_DONE_ST( bit 29) and ABB_GPU_DONE_ST (but 28). The commit b36c6b1887ff ("regulator: ti-abb: Make use of the helper function devm_ioremap related") overlooked the following comment implicitly explaining why devm_ioremap() is used in this case: /* * We may have shared interrupt register offsets which are * write-1-to-clear between domains ensuring exclusivity. */ Fixes and partially reverts commit b36c6b1887ff ("regulator: ti-abb: Make use of the helper function devm_ioremap related"). Improve the existing comment to avoid further conversion to devm_platform_ioremap_resource_byname(). Fixes: b36c6b1887ff ("regulator: ti-abb: Make use of the helper function devm_ioremap related") Signed-off-by: Romain Naour Reviewed-by: Yoann Congal Link: https://msgid.link/r/20240123111456.739381-1-romain.naour@smile.fr Signed-off-by: Mark Brown --- drivers/regulator/ti-abb-regulator.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/ti-abb-regulator.c b/drivers/regulator/ti-abb-regulator.c index f48214e2c3b46..04133510e5af7 100644 --- a/drivers/regulator/ti-abb-regulator.c +++ b/drivers/regulator/ti-abb-regulator.c @@ -726,9 +726,25 @@ static int ti_abb_probe(struct platform_device *pdev) return PTR_ERR(abb->setup_reg); } - abb->int_base = devm_platform_ioremap_resource_byname(pdev, "int-address"); - if (IS_ERR(abb->int_base)) - return PTR_ERR(abb->int_base); + pname = "int-address"; + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, pname); + if (!res) { + dev_err(dev, "Missing '%s' IO resource\n", pname); + return -ENODEV; + } + /* + * The MPU interrupt status register (PRM_IRQSTATUS_MPU) is + * shared between regulator-abb-{ivahd,dspeve,gpu} driver + * instances. Therefore use devm_ioremap() rather than + * devm_platform_ioremap_resource_byname() to avoid busy + * resource region conflicts. + */ + abb->int_base = devm_ioremap(dev, res->start, + resource_size(res)); + if (!abb->int_base) { + dev_err(dev, "Unable to map '%s'\n", pname); + return -ENOMEM; + } /* Map Optional resources */ pname = "efuse-address"; -- GitLab From 41353fbad4f551e82c2792f7e82ac225c79cc710 Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Thu, 18 Jan 2024 20:51:45 +0800 Subject: [PATCH 0124/1405] nvmet: unify aer type enum The host and target use two definition of aer type, unify them into a single one. Signed-off-by: Guixin Liu Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/core.c | 4 ++-- drivers/nvme/target/discovery.c | 2 +- include/linux/nvme.h | 6 ------ 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index d26aa30f87026..fa35e65915f0c 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -248,7 +248,7 @@ void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR)) continue; - nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, + nvmet_add_async_event(ctrl, NVME_AER_NOTICE, NVME_AER_NOTICE_NS_CHANGED, NVME_LOG_CHANGED_NS); } @@ -265,7 +265,7 @@ void nvmet_send_ana_event(struct nvmet_subsys *subsys, continue; if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE)) continue; - nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, + nvmet_add_async_event(ctrl, NVME_AER_NOTICE, NVME_AER_NOTICE_ANA, NVME_LOG_ANA); } mutex_unlock(&subsys->lock); diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 668d257fa9863..68e82ccc0e4e3 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -21,7 +21,7 @@ static void __nvmet_disc_changed(struct nvmet_port *port, if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_DISC_CHANGE)) return; - nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, + nvmet_add_async_event(ctrl, NVME_AER_NOTICE, NVME_AER_NOTICE_DISC_CHANGED, NVME_LOG_DISC); } diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 462c21e0e4176..68eff8c86ce34 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -816,12 +816,6 @@ struct nvme_reservation_status_ext { struct nvme_registered_ctrl_ext regctl_eds[]; }; -enum nvme_async_event_type { - NVME_AER_TYPE_ERROR = 0, - NVME_AER_TYPE_SMART = 1, - NVME_AER_TYPE_NOTICE = 2, -}; - /* I/O commands */ enum nvme_opcode { -- GitLab From 4373534a9850627a2695317944898eb1283a2db0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 12 Jan 2024 15:00:00 +0800 Subject: [PATCH 0125/1405] scsi: core: Move scsi_host_busy() out of host lock for waking up EH handler Inside scsi_eh_wakeup(), scsi_host_busy() is called & checked with host lock every time for deciding if error handler kthread needs to be waken up. This can be too heavy in case of recovery, such as: - N hardware queues - queue depth is M for each hardware queue - each scsi_host_busy() iterates over (N * M) tag/requests If recovery is triggered in case that all requests are in-flight, each scsi_eh_wakeup() is strictly serialized, when scsi_eh_wakeup() is called for the last in-flight request, scsi_host_busy() has been run for (N * M - 1) times, and request has been iterated for (N*M - 1) * (N * M) times. If both N and M are big enough, hard lockup can be triggered on acquiring host lock, and it is observed on mpi3mr(128 hw queues, queue depth 8169). Fix the issue by calling scsi_host_busy() outside the host lock. We don't need the host lock for getting busy count because host the lock never covers that. [mkp: Drop unnecessary 'busy' variables pointed out by Bart] Cc: Ewan Milne Fixes: 6eb045e092ef ("scsi: core: avoid host-wide host_busy counter for scsi_mq") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20240112070000.4161982-1-ming.lei@redhat.com Reviewed-by: Ewan D. Milne Reviewed-by: Sathya Prakash Veerichetty Tested-by: Sathya Prakash Veerichetty Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_error.c | 8 ++++---- drivers/scsi/scsi_lib.c | 2 +- drivers/scsi/scsi_priv.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 79da4b1c1df0a..4f455884fdc44 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -61,11 +61,11 @@ static int scsi_eh_try_stu(struct scsi_cmnd *scmd); static enum scsi_disposition scsi_try_to_abort_cmd(const struct scsi_host_template *, struct scsi_cmnd *); -void scsi_eh_wakeup(struct Scsi_Host *shost) +void scsi_eh_wakeup(struct Scsi_Host *shost, unsigned int busy) { lockdep_assert_held(shost->host_lock); - if (scsi_host_busy(shost) == shost->host_failed) { + if (busy == shost->host_failed) { trace_scsi_eh_wakeup(shost); wake_up_process(shost->ehandler); SCSI_LOG_ERROR_RECOVERY(5, shost_printk(KERN_INFO, shost, @@ -88,7 +88,7 @@ void scsi_schedule_eh(struct Scsi_Host *shost) if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 || scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) { shost->host_eh_scheduled++; - scsi_eh_wakeup(shost); + scsi_eh_wakeup(shost, scsi_host_busy(shost)); } spin_unlock_irqrestore(shost->host_lock, flags); @@ -286,7 +286,7 @@ static void scsi_eh_inc_host_failed(struct rcu_head *head) spin_lock_irqsave(shost->host_lock, flags); shost->host_failed++; - scsi_eh_wakeup(shost); + scsi_eh_wakeup(shost, scsi_host_busy(shost)); spin_unlock_irqrestore(shost->host_lock, flags); } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index cf3864f720930..1fb80eae9a63a 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -280,7 +280,7 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd) if (unlikely(scsi_host_in_recovery(shost))) { spin_lock_irqsave(shost->host_lock, flags); if (shost->host_failed || shost->host_eh_scheduled) - scsi_eh_wakeup(shost); + scsi_eh_wakeup(shost, scsi_host_busy(shost)); spin_unlock_irqrestore(shost->host_lock, flags); } rcu_read_unlock(); diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 3f0dfb97db6bd..1fbfe1b52c9f1 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -92,7 +92,7 @@ extern void scmd_eh_abort_handler(struct work_struct *work); extern enum blk_eh_timer_return scsi_timeout(struct request *req); extern int scsi_error_handler(void *host); extern enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *cmd); -extern void scsi_eh_wakeup(struct Scsi_Host *shost); +extern void scsi_eh_wakeup(struct Scsi_Host *shost, unsigned int busy); extern void scsi_eh_scmd_add(struct scsi_cmnd *); void scsi_eh_ready_devs(struct Scsi_Host *shost, struct list_head *work_q, -- GitLab From f4469f3858352ad1197434557150b1f7086762a0 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 22 Jan 2024 09:09:56 -0800 Subject: [PATCH 0126/1405] scsi: storvsc: Fix ring buffer size calculation Current code uses the specified ring buffer size (either the default of 128 Kbytes or a module parameter specified value) to encompass the one page ring buffer header plus the actual ring itself. When the page size is 4K, carving off one page for the header isn't significant. But when the page size is 64K on ARM64, only half of the default 128 Kbytes is left for the actual ring. While this doesn't break anything, the smaller ring size could be a performance bottleneck. Fix this by applying the VMBUS_RING_SIZE macro to the specified ring buffer size. This macro adds a page for the header, and rounds up the size to a page boundary, using the page size for which the kernel is built. Use this new size for subsequent ring buffer calculations. For example, on ARM64 with 64K page size and the default ring size, this results in the actual ring being 128 Kbytes, which is intended. Cc: stable@vger.kernel.org # 5.15.x Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/20240122170956.496436-1-mhklinux@outlook.com Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index a95936b18f695..7ceb982040a5d 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -330,6 +330,7 @@ enum storvsc_request_type { */ static int storvsc_ringbuffer_size = (128 * 1024); +static int aligned_ringbuffer_size; static u32 max_outstanding_req_per_channel; static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth); @@ -687,8 +688,8 @@ static void handle_sc_creation(struct vmbus_channel *new_sc) new_sc->next_request_id_callback = storvsc_next_request_id; ret = vmbus_open(new_sc, - storvsc_ringbuffer_size, - storvsc_ringbuffer_size, + aligned_ringbuffer_size, + aligned_ringbuffer_size, (void *)&props, sizeof(struct vmstorage_channel_properties), storvsc_on_channel_callback, new_sc); @@ -1973,7 +1974,7 @@ static int storvsc_probe(struct hv_device *device, dma_set_min_align_mask(&device->device, HV_HYP_PAGE_SIZE - 1); stor_device->port_number = host->host_no; - ret = storvsc_connect_to_vsp(device, storvsc_ringbuffer_size, is_fc); + ret = storvsc_connect_to_vsp(device, aligned_ringbuffer_size, is_fc); if (ret) goto err_out1; @@ -2164,7 +2165,7 @@ static int storvsc_resume(struct hv_device *hv_dev) { int ret; - ret = storvsc_connect_to_vsp(hv_dev, storvsc_ringbuffer_size, + ret = storvsc_connect_to_vsp(hv_dev, aligned_ringbuffer_size, hv_dev_is_fc(hv_dev)); return ret; } @@ -2198,8 +2199,9 @@ static int __init storvsc_drv_init(void) * the ring buffer indices) by the max request size (which is * vmbus_channel_packet_multipage_buffer + struct vstor_packet + u64) */ + aligned_ringbuffer_size = VMBUS_RING_SIZE(storvsc_ringbuffer_size); max_outstanding_req_per_channel = - ((storvsc_ringbuffer_size - PAGE_SIZE) / + ((aligned_ringbuffer_size - PAGE_SIZE) / ALIGN(MAX_MULTIPAGE_BUFFER_PACKET + sizeof(struct vstor_packet) + sizeof(u64), sizeof(u64))); -- GitLab From f74c35b630d40d414ca6b53f7b1b468dd4abf478 Mon Sep 17 00:00:00 2001 From: Mantas Pucka Date: Tue, 23 Jan 2024 18:09:19 +0200 Subject: [PATCH 0127/1405] phy: qcom-qmp-usb: fix register offsets for ipq8074/ipq6018 Commit 2be22aae6b18 ("phy: qcom-qmp-usb: populate offsets configuration") introduced register offsets to the driver but for ipq8074/ipq6018 they do not match what was in the old style device tree. Example from old ipq6018.dtsi: <0x00078200 0x130>, /* Tx */ <0x00078400 0x200>, /* Rx */ <0x00078800 0x1f8>, /* PCS */ <0x00078600 0x044>; /* PCS misc */ which would translate to: {.., .pcs = 0x800, .pcs_misc = 0x600, .tx = 0x200, .rx = 0x400 } but was translated to: {.., .pcs = 0x600, .tx = 0x200, .rx = 0x400 } So split usb_offsets and fix USB initialization for IPQ8074 and IPQ6018. Tested only on IPQ6018 Fixes: 2be22aae6b18 ("phy: qcom-qmp-usb: populate offsets configuration") Signed-off-by: Mantas Pucka Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/1706026160-17520-2-git-send-email-mantas@8devices.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 243cc2b9a0fb6..05b4c0e678962 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1556,6 +1556,14 @@ static const char * const qmp_phy_vreg_l[] = { "vdda-phy", "vdda-pll", }; +static const struct qmp_usb_offsets qmp_usb_offsets_ipq8074 = { + .serdes = 0, + .pcs = 0x800, + .pcs_misc = 0x600, + .tx = 0x200, + .rx = 0x400, +}; + static const struct qmp_usb_offsets qmp_usb_offsets_ipq9574 = { .serdes = 0, .pcs = 0x800, @@ -1616,7 +1624,7 @@ static const struct qmp_usb_offsets qmp_usb_offsets_v7 = { static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = { .lanes = 1, - .offsets = &qmp_usb_offsets_v3, + .offsets = &qmp_usb_offsets_ipq8074, .serdes_tbl = ipq8074_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(ipq8074_usb3_serdes_tbl), -- GitLab From 62a5df451ab911421da96655fcc4d1e269ff6e2f Mon Sep 17 00:00:00 2001 From: Mantas Pucka Date: Tue, 23 Jan 2024 18:09:20 +0200 Subject: [PATCH 0128/1405] phy: qcom-qmp-usb: fix serdes init sequence for IPQ6018 Commit 23fd679249df ("phy: qcom-qmp: add USB3 PHY support for IPQ6018") noted that IPQ6018 init is identical to IPQ8074. Yet downstream uses separate serdes init sequence for IPQ6018. Since already existing IPQ9574 serdes init sequence is identical, just reuse it and fix failing USB3 mode in IPQ6018. Fixes: 23fd679249df ("phy: qcom-qmp: add USB3 PHY support for IPQ6018") Signed-off-by: Mantas Pucka Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/1706026160-17520-3-git-send-email-mantas@8devices.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 05b4c0e678962..6621246e4ddf0 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1621,6 +1621,24 @@ static const struct qmp_usb_offsets qmp_usb_offsets_v7 = { .rx = 0x1000, }; +static const struct qmp_phy_cfg ipq6018_usb3phy_cfg = { + .lanes = 1, + + .offsets = &qmp_usb_offsets_ipq8074, + + .serdes_tbl = ipq9574_usb3_serdes_tbl, + .serdes_tbl_num = ARRAY_SIZE(ipq9574_usb3_serdes_tbl), + .tx_tbl = msm8996_usb3_tx_tbl, + .tx_tbl_num = ARRAY_SIZE(msm8996_usb3_tx_tbl), + .rx_tbl = ipq8074_usb3_rx_tbl, + .rx_tbl_num = ARRAY_SIZE(ipq8074_usb3_rx_tbl), + .pcs_tbl = ipq8074_usb3_pcs_tbl, + .pcs_tbl_num = ARRAY_SIZE(ipq8074_usb3_pcs_tbl), + .vreg_list = qmp_phy_vreg_l, + .num_vregs = ARRAY_SIZE(qmp_phy_vreg_l), + .regs = qmp_v3_usb3phy_regs_layout, +}; + static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = { .lanes = 1, @@ -2571,7 +2589,7 @@ static int qmp_usb_probe(struct platform_device *pdev) static const struct of_device_id qmp_usb_of_match_table[] = { { .compatible = "qcom,ipq6018-qmp-usb3-phy", - .data = &ipq8074_usb3phy_cfg, + .data = &ipq6018_usb3phy_cfg, }, { .compatible = "qcom,ipq8074-qmp-usb3-phy", .data = &ipq8074_usb3phy_cfg, -- GitLab From 249abaf3bf0dd07f5ddebbb2fe2e8f4d675f074e Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 5 Jan 2024 18:37:03 +0900 Subject: [PATCH 0129/1405] phy: renesas: rcar-gen3-usb2: Fix returning wrong error code Even if device_create_file() returns error code, rcar_gen3_phy_usb2_probe() will return zero because the "ret" is variable shadowing. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202312161021.gOLDl48K-lkp@intel.com/ Fixes: 441a681b8843 ("phy: rcar-gen3-usb2: fix implementation for runtime PM") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240105093703.3359949-1-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Vinod Koul --- drivers/phy/renesas/phy-rcar-gen3-usb2.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c index e53eace7c91e3..6387c0d34c551 100644 --- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c +++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c @@ -673,8 +673,6 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) channel->irq = platform_get_irq_optional(pdev, 0); channel->dr_mode = rcar_gen3_get_dr_mode(dev->of_node); if (channel->dr_mode != USB_DR_MODE_UNKNOWN) { - int ret; - channel->is_otg_channel = true; channel->uses_otg_pins = !of_property_read_bool(dev->of_node, "renesas,no-otg-pins"); @@ -738,8 +736,6 @@ static int rcar_gen3_phy_usb2_probe(struct platform_device *pdev) ret = PTR_ERR(provider); goto error; } else if (channel->is_otg_channel) { - int ret; - ret = device_create_file(dev, &dev_attr_role); if (ret < 0) goto error; -- GitLab From 4e4a1183f281d95fbb6caf28d670775d13264beb Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 8 Jan 2024 21:51:40 +0100 Subject: [PATCH 0130/1405] phy: lan966x: Add missing serdes mux entry According to the datasheet(Table 3-2: Port configuration) the serdes 2 (SD2) can be configured to run QSGMII or SGMII mode. Already the QSGMII mode is supported in the serdes_muxes list but was missing the SGMII mode. In this mode the serdes is connected to the port 4. Therefore add this entry in the list. Signed-off-by: Horatiu Vultur Link: https://lore.kernel.org/r/20240108205140.1701770-1-horatiu.vultur@microchip.com Signed-off-by: Vinod Koul --- drivers/phy/microchip/lan966x_serdes.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/phy/microchip/lan966x_serdes.c b/drivers/phy/microchip/lan966x_serdes.c index c1a41b6cd29b1..b5ac2b7995e71 100644 --- a/drivers/phy/microchip/lan966x_serdes.c +++ b/drivers/phy/microchip/lan966x_serdes.c @@ -96,6 +96,8 @@ static const struct serdes_mux lan966x_serdes_muxes[] = { SERDES_MUX_SGMII(SERDES6G(1), 3, HSIO_HW_CFG_SD6G_1_CFG, HSIO_HW_CFG_SD6G_1_CFG_SET(1)), + SERDES_MUX_SGMII(SERDES6G(2), 4, 0, 0), + SERDES_MUX_RGMII(RGMII(0), 2, HSIO_HW_CFG_RGMII_0_CFG | HSIO_HW_CFG_RGMII_ENA | HSIO_HW_CFG_GMII_ENA, -- GitLab From fcfc9f711d1e2fc7876ac12b1b16c509404b9625 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 24 Jan 2024 14:21:47 +0800 Subject: [PATCH 0131/1405] ALSA: hda/realtek - Add speaker pin verbtable for Dell dual speaker platform SSID 0x0c0d platform. It can't mute speaker when HP plugged. This patch add quirk to fill speaker pin verbtable. And disable speaker passthrough. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/38b82976a875451d833d514cee34ff6a@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 19f2eb26659d8..bfefe9ccbc6be 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -439,6 +439,10 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000); fallthrough; case 0x10ec0215: + case 0x10ec0285: + case 0x10ec0289: + alc_update_coef_idx(codec, 0x36, 1<<13, 0); + fallthrough; case 0x10ec0230: case 0x10ec0233: case 0x10ec0235: @@ -452,9 +456,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0283: case 0x10ec0286: case 0x10ec0288: - case 0x10ec0285: case 0x10ec0298: - case 0x10ec0289: case 0x10ec0300: alc_update_coef_idx(codec, 0x10, 1<<9, 0); break; @@ -9732,6 +9734,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0b71, "Dell Inspiron 16 Plus 7620", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), SND_PCI_QUIRK(0x1028, 0x0beb, "Dell XPS 15 9530 (2023)", ALC289_FIXUP_DELL_CS35L41_SPI_2), SND_PCI_QUIRK(0x1028, 0x0c03, "Dell Precision 5340", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0c0d, "Dell Oasis", ALC289_FIXUP_RTK_AMP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0c19, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1a, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1b, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS), -- GitLab From 7c4298534ce3c4b85099aba53442ec82ef17578b Mon Sep 17 00:00:00 2001 From: Jacob Siverskog Date: Wed, 24 Jan 2024 11:18:24 +0100 Subject: [PATCH 0132/1405] ALSA: usb-audio: fix typo fix typo in midi fallback log. Signed-off-by: Jacob Siverskog Fixes: ff49d1df79ae ("ALSA: usb-audio: USB MIDI 2.0 UMP support") Link: https://lore.kernel.org/r/20240124101827.35433-1-jacob@teenage.engineering Signed-off-by: Takashi Iwai --- sound/usb/midi2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/midi2.c b/sound/usb/midi2.c index 1ec177fe284ed..820d3e4b672ab 100644 --- a/sound/usb/midi2.c +++ b/sound/usb/midi2.c @@ -1085,7 +1085,7 @@ int snd_usb_midi_v2_create(struct snd_usb_audio *chip, } if ((quirk && quirk->type != QUIRK_MIDI_STANDARD_INTERFACE) || iface->num_altsetting < 2) { - usb_audio_info(chip, "Quirk or no altest; falling back to MIDI 1.0\n"); + usb_audio_info(chip, "Quirk or no altset; falling back to MIDI 1.0\n"); goto fallback_to_midi1; } hostif = &iface->altsetting[1]; -- GitLab From d62ccb59afcd94e8d301433974672b156392289d Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Wed, 24 Jan 2024 20:08:34 +0800 Subject: [PATCH 0133/1405] ALSA: virtio: remove duplicate check if queue is broken virtqueue_enable_cb() will call virtqueue_poll() which will check if queue is broken at beginning, so remove the virtqueue_is_broken() call Signed-off-by: Li RongQing Reviewed-by: Stefan Hajnoczi Link: https://lore.kernel.org/r/20240124120834.49410-1-lirongqing@baidu.com Signed-off-by: Takashi Iwai --- sound/virtio/virtio_card.c | 2 -- sound/virtio/virtio_ctl_msg.c | 2 -- sound/virtio/virtio_pcm_msg.c | 2 -- 3 files changed, 6 deletions(-) diff --git a/sound/virtio/virtio_card.c b/sound/virtio/virtio_card.c index e2847c040f750..b158c3cb8e5f5 100644 --- a/sound/virtio/virtio_card.c +++ b/sound/virtio/virtio_card.c @@ -91,8 +91,6 @@ static void virtsnd_event_notify_cb(struct virtqueue *vqueue) virtsnd_event_dispatch(snd, event); virtsnd_event_send(vqueue, event, true, GFP_ATOMIC); } - if (unlikely(virtqueue_is_broken(vqueue))) - break; } while (!virtqueue_enable_cb(vqueue)); spin_unlock_irqrestore(&queue->lock, flags); } diff --git a/sound/virtio/virtio_ctl_msg.c b/sound/virtio/virtio_ctl_msg.c index 18dc5aca2e0c5..9dabea01277f8 100644 --- a/sound/virtio/virtio_ctl_msg.c +++ b/sound/virtio/virtio_ctl_msg.c @@ -303,8 +303,6 @@ void virtsnd_ctl_notify_cb(struct virtqueue *vqueue) virtqueue_disable_cb(vqueue); while ((msg = virtqueue_get_buf(vqueue, &length))) virtsnd_ctl_msg_complete(msg); - if (unlikely(virtqueue_is_broken(vqueue))) - break; } while (!virtqueue_enable_cb(vqueue)); spin_unlock_irqrestore(&queue->lock, flags); } diff --git a/sound/virtio/virtio_pcm_msg.c b/sound/virtio/virtio_pcm_msg.c index 542446c4c7ba8..8c32efaf4c529 100644 --- a/sound/virtio/virtio_pcm_msg.c +++ b/sound/virtio/virtio_pcm_msg.c @@ -358,8 +358,6 @@ static inline void virtsnd_pcm_notify_cb(struct virtio_snd_queue *queue) virtqueue_disable_cb(queue->vqueue); while ((msg = virtqueue_get_buf(queue->vqueue, &written_bytes))) virtsnd_pcm_msg_complete(msg, written_bytes); - if (unlikely(virtqueue_is_broken(queue->vqueue))) - break; } while (!virtqueue_enable_cb(queue->vqueue)); spin_unlock_irqrestore(&queue->lock, flags); } -- GitLab From d915a6850e27efb383cd4400caadfe47792623df Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Wed, 24 Jan 2024 16:02:39 +0300 Subject: [PATCH 0134/1405] ALSA: usb-audio: Add delay quirk for MOTU M Series 2nd revision Audio control requests that sets sampling frequency sometimes fail on this card. Adding delay between control messages eliminates that problem. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217601 Cc: Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20240124130239.358298-1-alexander@tsoy.me Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 43c14a81a1e2f..be0ce1fc4f26c 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2075,6 +2075,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x0763, 0x2031, /* M-Audio Fast Track C600 */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x07fd, 0x000b, /* MOTU M Series 2nd hardware revision */ + QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x08bb, 0x2702, /* LineX FM Transmitter */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x0951, 0x16ad, /* Kingston HyperX */ -- GitLab From 92b0b0ff0ba32e7b3f1e789cc96df1359db712ef Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 23 Jan 2024 14:13:40 -0800 Subject: [PATCH 0135/1405] nvme: add module description to stop warnings Add MODULE_DESCRIPTION() in order to remove warnings & get clean build:- WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/host/nvme-core.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/host/nvme.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/host/nvme-fabrics.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/host/nvme-rdma.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/host/nvme-fc.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/host/nvme-tcp.o Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 1 + drivers/nvme/host/fabrics.c | 1 + drivers/nvme/host/fc.c | 1 + drivers/nvme/host/pci.c | 1 + drivers/nvme/host/rdma.c | 1 + drivers/nvme/host/tcp.c | 1 + 6 files changed, 6 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 85ab0fcf9e886..0810be0d1154c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4851,5 +4851,6 @@ static void __exit nvme_core_exit(void) MODULE_LICENSE("GPL"); MODULE_VERSION("1.0"); +MODULE_DESCRIPTION("NVMe host core framework"); module_init(nvme_core_init); module_exit(nvme_core_exit); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index b5752a77ad989..373ed08e6b920 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -1488,6 +1488,7 @@ static void __exit nvmf_exit(void) } MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("NVMe host fabrics library"); module_init(nvmf_init); module_exit(nvmf_exit); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 16847a316421f..e2308119f8f0b 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -4004,4 +4004,5 @@ static void __exit nvme_fc_exit_module(void) module_init(nvme_fc_init_module); module_exit(nvme_fc_exit_module); +MODULE_DESCRIPTION("NVMe host FC transport driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c1d6357ec98a0..25eb727795417 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3543,5 +3543,6 @@ static void __exit nvme_exit(void) MODULE_AUTHOR("Matthew Wilcox "); MODULE_LICENSE("GPL"); MODULE_VERSION("1.0"); +MODULE_DESCRIPTION("NVMe host PCIe transport driver"); module_init(nvme_init); module_exit(nvme_exit); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 11dde0d830442..0f48ead986ec5 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2400,4 +2400,5 @@ static void __exit nvme_rdma_cleanup_module(void) module_init(nvme_rdma_init_module); module_exit(nvme_rdma_cleanup_module); +MODULE_DESCRIPTION("NVMe host RDMA transport driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index d058d990532bf..4393cf244025e 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2826,4 +2826,5 @@ static void __exit nvme_tcp_cleanup_module(void) module_init(nvme_tcp_init_module); module_exit(nvme_tcp_cleanup_module); +MODULE_DESCRIPTION("NVMe host TCP transport driver"); MODULE_LICENSE("GPL v2"); -- GitLab From 41951f83ef9044e906e11f5ea7db35a30dc9f581 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 23 Jan 2024 14:13:41 -0800 Subject: [PATCH 0136/1405] nvmet: add module description to stop warnings Add MODULE_DESCRIPTION() in order to remove warnings & get clean build:- WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/target/nvmet.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/target/nvme-loop.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/target/nvmet-rdma.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/target/nvmet-fc.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/target/nvme-fcloop.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/target/nvmet-tcp.o Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/core.c | 1 + drivers/nvme/target/fc.c | 1 + drivers/nvme/target/fcloop.c | 1 + drivers/nvme/target/loop.c | 1 + drivers/nvme/target/rdma.c | 1 + drivers/nvme/target/tcp.c | 1 + 6 files changed, 6 insertions(+) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index fa35e65915f0c..8658e9c08534d 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1705,4 +1705,5 @@ static void __exit nvmet_exit(void) module_init(nvmet_init); module_exit(nvmet_exit); +MODULE_DESCRIPTION("NVMe target core framework"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index bda7a3009e851..05e6a755b3306 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -2945,4 +2945,5 @@ static void __exit nvmet_fc_exit_module(void) module_init(nvmet_fc_init_module); module_exit(nvmet_fc_exit_module); +MODULE_DESCRIPTION("NVMe target FC transport driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index ead349af30f1e..c1faeb1e9e556 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -1650,4 +1650,5 @@ static void __exit fcloop_exit(void) module_init(fcloop_init); module_exit(fcloop_exit); +MODULE_DESCRIPTION("NVMe target FC loop transport driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 9cb434c580751..2b2e0d00af85e 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -688,5 +688,6 @@ static void __exit nvme_loop_cleanup_module(void) module_init(nvme_loop_init_module); module_exit(nvme_loop_cleanup_module); +MODULE_DESCRIPTION("NVMe target loop transport driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("nvmet-transport-254"); /* 254 == NVMF_TRTYPE_LOOP */ diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 667f9c04f35d5..3a0f2c170f4c1 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -2104,5 +2104,6 @@ static void __exit nvmet_rdma_exit(void) module_init(nvmet_rdma_init); module_exit(nvmet_rdma_exit); +MODULE_DESCRIPTION("NVMe target RDMA transport driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("nvmet-transport-1"); /* 1 == NVMF_TRTYPE_RDMA */ diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 6a1e6bb80062d..da0469978d6f4 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -2221,5 +2221,6 @@ static void __exit nvmet_tcp_exit(void) module_init(nvmet_tcp_init); module_exit(nvmet_tcp_exit); +MODULE_DESCRIPTION("NVMe target TCP transport driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("nvmet-transport-3"); /* 3 == NVMF_TRTYPE_TCP */ -- GitLab From 7822baa844a87cbb93308c1032c3d47d4079bb8a Mon Sep 17 00:00:00 2001 From: Sean Young Date: Wed, 24 Jan 2024 15:15:24 +0000 Subject: [PATCH 0137/1405] ALSA: usb-audio: add quirk for RODE NT-USB+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RODE NT-USB+ is marketed as a professional usb microphone, however the usb audio interface is a mess: [ 1.130977] usb 1-5: new full-speed USB device number 2 using xhci_hcd [ 1.503906] usb 1-5: config 1 has an invalid interface number: 5 but max is 4 [ 1.503912] usb 1-5: config 1 has no interface number 4 [ 1.519689] usb 1-5: New USB device found, idVendor=19f7, idProduct=0035, bcdDevice= 1.09 [ 1.519695] usb 1-5: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ 1.519697] usb 1-5: Product: RØDE NT-USB+ [ 1.519699] usb 1-5: Manufacturer: RØDE [ 1.519700] usb 1-5: SerialNumber: 1D773A1A [ 8.327495] usb 1-5: 1:1: cannot get freq at ep 0x82 [ 8.344500] usb 1-5: 1:2: cannot get freq at ep 0x82 [ 8.365499] usb 1-5: 2:1: cannot get freq at ep 0x2 Add QUIRK_FLAG_GET_SAMPLE_RATE to work around the broken sample rate get. I have asked Rode support to fix it, but they show no interest. Signed-off-by: Sean Young Cc: Link: https://lore.kernel.org/r/20240124151524.23314-1-sean@mess.org Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index be0ce1fc4f26c..591c3a3546962 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2183,6 +2183,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_FIXED_RATE), DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x19f7, 0x0035, /* RODE NT-USB+ */ + QUIRK_FLAG_GET_SAMPLE_RATE), /* Vendor matches */ VENDOR_FLG(0x045e, /* MS Lifecam */ -- GitLab From 15ade5bfa5abf0e02249239db91d5438fa796d18 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Wed, 24 Jan 2024 12:16:27 +0000 Subject: [PATCH 0138/1405] nvme-rdma: Fix transfer length when write_generate/read_verify are 0 When the block layer doesn't generate/verify metadata, the SG length is smaller than the transfer length. This is because the SG length doesn't include the metadata length that is added by the HW on the wire. The target failes those commands with "Data SGL Length Invalid" by comparing the transfer length and the SG length. Fix it by adding the metadata length to the transfer length when there is no metadata SGL. The bug reproduces when setting read_verify/write_generate configs to 0 at the child multipath device or at the primary device when NVMe multipath is disabled. Note that setting those configs to 0 on the multipath device (ns_head) doesn't have any impact on the I/Os. Fixes: 5ec5d3bddc6b ("nvme-rdma: add metadata/T10-PI support") Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 0f48ead986ec5..3f393ee202811 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1410,6 +1410,8 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue, struct nvme_ns *ns = rq->q->queuedata; struct bio *bio = rq->bio; struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; + struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); + u32 xfer_len; int nr; req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs); @@ -1422,8 +1424,7 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue, if (unlikely(nr)) goto mr_put; - nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_bdev->bd_disk), c, - req->mr->sig_attrs, ns->head->pi_type); + nvme_rdma_set_sig_attrs(bi, c, req->mr->sig_attrs, ns->head->pi_type); nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask); ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); @@ -1441,7 +1442,11 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue, IB_ACCESS_REMOTE_WRITE; sg->addr = cpu_to_le64(req->mr->iova); - put_unaligned_le24(req->mr->length, sg->length); + xfer_len = req->mr->length; + /* Check if PI is added by the HW */ + if (!pi_count) + xfer_len += (xfer_len >> bi->interval_exp) * ns->head->pi_size; + put_unaligned_le24(xfer_len, sg->length); put_unaligned_le32(req->mr->rkey, sg->key); sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; -- GitLab From 668abe6dc7b61941fa5c724c06797efb0b87f070 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 24 Jan 2024 16:53:07 +0100 Subject: [PATCH 0139/1405] ALSA: usb-audio: Sort quirk table entries The quirk table entries should be put in the USB ID order, but some entries have been put in random places. Re-sort them. Fixes: bf990c102319 ("ALSA: usb-audio: add quirk to fix Hamedal C20 disconnect issue") Fixes: fd28941cff1c ("ALSA: usb-audio: Add new quirk FIXED_RATE for JBL Quantum810 Wireless") Fixes: dfd5fe19db7d ("ALSA: usb-audio: Add FIXED_RATE quirk for JBL Quantum610 Wireless") Fixes: 4a63e68a2951 ("ALSA: usb-audio: Fix microphone sound on Nexigo webcam.") Fixes: 7822baa844a8 ("ALSA: usb-audio: add quirk for RODE NT-USB+") Fixes: 4fb7c24f69c4 ("ALSA: usb-audio: Add quirk for Fiero SC-01") Fixes: 2307a0e1ca0b ("ALSA: usb-audio: Add quirk for Fiero SC-01 (fw v1.0.0)") Link: https://lore.kernel.org/r/20240124155307.16996-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 591c3a3546962..09712e61c606e 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2037,6 +2037,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x04e8, 0xa051, /* Samsung USBC Headset (AKG) */ QUIRK_FLAG_SKIP_CLOCK_SELECTOR | QUIRK_FLAG_CTL_MSG_DELAY_5M), + DEVICE_FLG(0x0525, 0xa4ad, /* Hamedal C20 usb camero */ + QUIRK_FLAG_IFACE_SKIP_CLOSE), DEVICE_FLG(0x054c, 0x0b8c, /* Sony WALKMAN NW-A45 DAC */ QUIRK_FLAG_SET_IFACE_FIRST), DEVICE_FLG(0x0556, 0x0014, /* Phoenix Audio TMX320VC */ @@ -2083,8 +2085,14 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x0b0e, 0x0349, /* Jabra 550a */ QUIRK_FLAG_CTL_MSG_DELAY_1M), + DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */ + QUIRK_FLAG_FIXED_RATE), + DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */ + QUIRK_FLAG_FIXED_RATE), DEVICE_FLG(0x0fd9, 0x0008, /* Hauppauge HVR-950Q */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), + DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x1395, 0x740a, /* Sennheiser DECT */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x1397, 0x0507, /* Behringer UMC202HD */ @@ -2117,6 +2125,10 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY), DEVICE_FLG(0x1901, 0x0191, /* GE B850V3 CP2114 audio interface */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x19f7, 0x0035, /* RODE NT-USB+ */ + QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */ + QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x2040, 0x7200, /* Hauppauge HVR-950Q */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x2040, 0x7201, /* Hauppauge HVR-950Q-MXL */ @@ -2159,6 +2171,12 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x2912, 0x30c8, /* Audioengine D1 */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x2b53, 0x0023, /* Fiero SC-01 (firmware v1.0.0 @ 48 kHz) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2b53, 0x0024, /* Fiero SC-01 (firmware v1.0.0 @ 96 kHz) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ + QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */ @@ -2167,24 +2185,6 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x534d, 0x2109, /* MacroSilicon MS2109 */ QUIRK_FLAG_ALIGN_TRANSFER), - DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */ - QUIRK_FLAG_GET_SAMPLE_RATE), - DEVICE_FLG(0x2b53, 0x0023, /* Fiero SC-01 (firmware v1.0.0 @ 48 kHz) */ - QUIRK_FLAG_GENERIC_IMPLICIT_FB), - DEVICE_FLG(0x2b53, 0x0024, /* Fiero SC-01 (firmware v1.0.0 @ 96 kHz) */ - QUIRK_FLAG_GENERIC_IMPLICIT_FB), - DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ - QUIRK_FLAG_GENERIC_IMPLICIT_FB), - DEVICE_FLG(0x0525, 0xa4ad, /* Hamedal C20 usb camero */ - QUIRK_FLAG_IFACE_SKIP_CLOSE), - DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */ - QUIRK_FLAG_FIXED_RATE), - DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */ - QUIRK_FLAG_FIXED_RATE), - DEVICE_FLG(0x1bcf, 0x2283, /* NexiGo N930AF FHD Webcam */ - QUIRK_FLAG_GET_SAMPLE_RATE), - DEVICE_FLG(0x19f7, 0x0035, /* RODE NT-USB+ */ - QUIRK_FLAG_GET_SAMPLE_RATE), /* Vendor matches */ VENDOR_FLG(0x045e, /* MS Lifecam */ -- GitLab From 1793ce9f205efe07ca2992ef129b86dab2c329f5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 30 Dec 2023 22:08:23 -0800 Subject: [PATCH 0140/1405] drm/msm/dpu: fix kernel-doc warnings Correct all kernel-doc warnings in dpu_encoder.c and dpu_rm.c: dpu_encoder.c:212: warning: Excess struct member 'crtc_kickoff_cb' description in 'dpu_encoder_virt' dpu_encoder.c:212: warning: Excess struct member 'crtc_kickoff_cb_data' description in 'dpu_encoder_virt' dpu_encoder.c:212: warning: Excess struct member 'debugfs_root' description in 'dpu_encoder_virt' dpu_rm.c:35: warning: Excess struct member 'hw_res' description in 'dpu_rm_requirements' dpu_rm.c:208: warning: No description found for return value of '_dpu_rm_get_lm_peer' Signed-off-by: Randy Dunlap Cc: Rob Clark Cc: Abhinav Kumar Cc: Dmitry Baryshkov Cc: Sean Paul Cc: Marijn Suijten Cc: linux-arm-msm@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: freedreno@lists.freedesktop.org Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: Jonathan Corbet Cc: Vegard Nossum Reviewed-by: Paloma Arellano Reviewed-by: Dmitry Baryshkov Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312170641.5exlvQQx-lkp@intel.com/ Fixes: 62d35629da80 ("drm/msm/dpu: move encoder status to standard encoder debugfs dir") Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") Patchwork: https://patchwork.freedesktop.org/patch/572962/ Link: https://lore.kernel.org/r/20231231060823.1934-1-rdunlap@infradead.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 4 ---- drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c | 3 ++- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 83380bc92a00a..f2b82ca5efb39 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -144,10 +144,6 @@ enum dpu_enc_rc_states { * to track crtc in the disable() hook which is called * _after_ encoder_mask is cleared. * @connector: If a mode is set, cached pointer to the active connector - * @crtc_kickoff_cb: Callback into CRTC that will flush & start - * all CTL paths - * @crtc_kickoff_cb_data: Opaque user data given to crtc_kickoff_cb - * @debugfs_root: Debug file system root file node * @enc_lock: Lock around physical encoder * create/destroy/enable/disable * @frame_busy_mask: Bitmask tracking which phys_enc we are still diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c index b58a9c2ae326c..724537ab776df 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c @@ -29,7 +29,6 @@ static inline bool reserved_by_other(uint32_t *res_map, int idx, /** * struct dpu_rm_requirements - Reservation requirements parameter bundle * @topology: selected topology for the display - * @hw_res: Hardware resources required as reported by the encoders */ struct dpu_rm_requirements { struct msm_display_topology topology; @@ -204,6 +203,8 @@ static bool _dpu_rm_needs_split_display(const struct msm_display_topology *top) * _dpu_rm_get_lm_peer - get the id of a mixer which is a peer of the primary * @rm: dpu resource manager handle * @primary_idx: index of primary mixer in rm->mixer_blks[] + * + * Returns: lm peer mixed id on success or %-EINVAL on error */ static int _dpu_rm_get_lm_peer(struct dpu_rm *rm, int primary_idx) { -- GitLab From 77e8aad5519e04f6c1e132aaec1c5f8faf41844f Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Wed, 10 Jan 2024 12:18:51 -0800 Subject: [PATCH 0141/1405] drm/msms/dp: fixed link clock divider bits be over written in BPC unknown case Since the value of DP_TEST_BIT_DEPTH_8 is already left shifted, in the BPC unknown case, the additional shift causes spill over to the other bits of the [DP_CONFIGURATION_CTRL] register. Fix this by changing the return value of dp_link_get_test_bits_depth() in the BPC unknown case to (DP_TEST_BIT_DEPTH_8 >> DP_TEST_BIT_DEPTH_SHIFT). Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") Signed-off-by: Kuogee Hsieh Reviewed-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/573989/ Link: https://lore.kernel.org/r/1704917931-30133-1-git-send-email-quic_khsieh@quicinc.com [quic_abhinavk@quicinc.com: fix minor checkpatch warning to align with opening braces] Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dp/dp_ctrl.c | 5 ----- drivers/gpu/drm/msm/dp/dp_link.c | 10 +++++++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index 77a8d9366ed7b..fb588fde298a2 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -135,11 +135,6 @@ static void dp_ctrl_config_ctrl(struct dp_ctrl_private *ctrl) tbd = dp_link_get_test_bits_depth(ctrl->link, ctrl->panel->dp_mode.bpp); - if (tbd == DP_TEST_BIT_DEPTH_UNKNOWN) { - pr_debug("BIT_DEPTH not set. Configure default\n"); - tbd = DP_TEST_BIT_DEPTH_8; - } - config |= tbd << DP_CONFIGURATION_CTRL_BPC_SHIFT; /* Num of Lanes */ diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c index 98427d45e9a7e..a0015b9e79eb7 100644 --- a/drivers/gpu/drm/msm/dp/dp_link.c +++ b/drivers/gpu/drm/msm/dp/dp_link.c @@ -1179,6 +1179,9 @@ void dp_link_reset_phy_params_vx_px(struct dp_link *dp_link) u32 dp_link_get_test_bits_depth(struct dp_link *dp_link, u32 bpp) { u32 tbd; + struct dp_link_private *link; + + link = container_of(dp_link, struct dp_link_private, dp_link); /* * Few simplistic rules and assumptions made here: @@ -1196,12 +1199,13 @@ u32 dp_link_get_test_bits_depth(struct dp_link *dp_link, u32 bpp) tbd = DP_TEST_BIT_DEPTH_10; break; default: - tbd = DP_TEST_BIT_DEPTH_UNKNOWN; + drm_dbg_dp(link->drm_dev, "bpp=%d not supported, use bpc=8\n", + bpp); + tbd = DP_TEST_BIT_DEPTH_8; break; } - if (tbd != DP_TEST_BIT_DEPTH_UNKNOWN) - tbd = (tbd >> DP_TEST_BIT_DEPTH_SHIFT); + tbd = (tbd >> DP_TEST_BIT_DEPTH_SHIFT); return tbd; } -- GitLab From fcccdafd91f8bdde568b86ff70848cf83f029add Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Wed, 17 Jan 2024 13:13:30 -0800 Subject: [PATCH 0142/1405] drm/msm/dp: return correct Colorimetry for DP_TEST_DYNAMIC_RANGE_CEA case MSA MISC0 bit 1 to 7 contains Colorimetry Indicator Field. dp_link_get_colorimetry_config() returns wrong colorimetry value in the DP_TEST_DYNAMIC_RANGE_CEA case in the current implementation. Hence fix this problem by having dp_link_get_colorimetry_config() return defined CEA RGB colorimetry value in the case of DP_TEST_DYNAMIC_RANGE_CEA. Changes in V2: -- drop retrieving colorimetry from colorspace -- drop dr = link->dp_link.test_video.test_dyn_range assignment Changes in V3: -- move defined MISCr0a Colorimetry vale to dp_reg.h -- rewording commit title -- rewording commit text to more precise describe this patch Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") Signed-off-by: Kuogee Hsieh Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/574888/ Link: https://lore.kernel.org/r/1705526010-597-1-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/dp/dp_link.c | 12 +++++++----- drivers/gpu/drm/msm/dp/dp_reg.h | 3 +++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c index a0015b9e79eb7..49dfac1fd1ef2 100644 --- a/drivers/gpu/drm/msm/dp/dp_link.c +++ b/drivers/gpu/drm/msm/dp/dp_link.c @@ -7,6 +7,7 @@ #include +#include "dp_reg.h" #include "dp_link.h" #include "dp_panel.h" @@ -1082,7 +1083,7 @@ int dp_link_process_request(struct dp_link *dp_link) int dp_link_get_colorimetry_config(struct dp_link *dp_link) { - u32 cc; + u32 cc = DP_MISC0_COLORIMERY_CFG_LEGACY_RGB; struct dp_link_private *link; if (!dp_link) { @@ -1096,10 +1097,11 @@ int dp_link_get_colorimetry_config(struct dp_link *dp_link) * Unless a video pattern CTS test is ongoing, use RGB_VESA * Only RGB_VESA and RGB_CEA supported for now */ - if (dp_link_is_video_pattern_requested(link)) - cc = link->dp_link.test_video.test_dyn_range; - else - cc = DP_TEST_DYNAMIC_RANGE_VESA; + if (dp_link_is_video_pattern_requested(link)) { + if (link->dp_link.test_video.test_dyn_range & + DP_TEST_DYNAMIC_RANGE_CEA) + cc = DP_MISC0_COLORIMERY_CFG_CEA_RGB; + } return cc; } diff --git a/drivers/gpu/drm/msm/dp/dp_reg.h b/drivers/gpu/drm/msm/dp/dp_reg.h index ea85a691e72b5..78785ed4b40c4 100644 --- a/drivers/gpu/drm/msm/dp/dp_reg.h +++ b/drivers/gpu/drm/msm/dp/dp_reg.h @@ -143,6 +143,9 @@ #define DP_MISC0_COLORIMETRY_CFG_SHIFT (0x00000001) #define DP_MISC0_TEST_BITS_DEPTH_SHIFT (0x00000005) +#define DP_MISC0_COLORIMERY_CFG_LEGACY_RGB (0) +#define DP_MISC0_COLORIMERY_CFG_CEA_RGB (0x04) + #define REG_DP_VALID_BOUNDARY (0x00000030) #define REG_DP_VALID_BOUNDARY_2 (0x00000034) -- GitLab From 7f3d03c48b1eb6bc45ab20ca98b8b11be25f9f52 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Wed, 17 Jan 2024 11:41:09 -0800 Subject: [PATCH 0143/1405] drm/msm/dpu: check for valid hw_pp in dpu_encoder_helper_phys_cleanup The commit 8b45a26f2ba9 ("drm/msm/dpu: reserve cdm blocks for writeback in case of YUV output") introduced a smatch warning about another conditional block in dpu_encoder_helper_phys_cleanup() which had assumed hw_pp will always be valid which may not necessarily be true. Lets fix the other conditional block by making sure hw_pp is valid before dereferencing it. Reported-by: Dan Carpenter Fixes: ae4d721ce100 ("drm/msm/dpu: add an API to reset the encoder related hw blocks") Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/574878/ Link: https://lore.kernel.org/r/20240117194109.21609-1-quic_abhinavk@quicinc.com Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index f2b82ca5efb39..6a4b489d44e51 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -2068,7 +2068,7 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) } /* reset the merge 3D HW block */ - if (phys_enc->hw_pp->merge_3d) { + if (phys_enc->hw_pp && phys_enc->hw_pp->merge_3d) { phys_enc->hw_pp->merge_3d->ops.setup_3d_mode(phys_enc->hw_pp->merge_3d, BLEND_3D_NONE); if (phys_enc->hw_ctl->ops.update_pending_flush_merge_3d) @@ -2099,7 +2099,7 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) if (phys_enc->hw_wb) intf_cfg.wb = phys_enc->hw_wb->idx; - if (phys_enc->hw_pp->merge_3d) + if (phys_enc->hw_pp && phys_enc->hw_pp->merge_3d) intf_cfg.merge_3d = phys_enc->hw_pp->merge_3d->idx; if (ctl->ops.reset_intf_cfg) -- GitLab From 97cf5d53b4812dcb52c13fda700dad5aa8d3446c Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Wed, 24 Jan 2024 11:19:45 +0800 Subject: [PATCH 0144/1405] erofs: get rid of unneeded GFP_NOFS Clean up some leftovers since there is no way for EROFS to be called again from a reclaim context. Signed-off-by: Jingbo Xu Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20240124031945.130782-1-jefflexu@linux.alibaba.com Signed-off-by: Gao Xiang --- fs/erofs/fscache.c | 2 +- fs/erofs/inode.c | 2 +- fs/erofs/utils.c | 2 +- fs/erofs/zdata.c | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index bc12030393b24..5ff90026fd43f 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -459,7 +459,7 @@ static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb inode->i_size = OFFSET_MAX; inode->i_mapping->a_ops = &erofs_fscache_meta_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); inode->i_blkbits = EROFS_SB(sb)->blkszbits; inode->i_private = ctx; diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 3d616dea55dc3..36e638e8b53a3 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -60,7 +60,7 @@ static void *erofs_read_inode(struct erofs_buf *buf, } else { const unsigned int gotten = sb->s_blocksize - *ofs; - copied = kmalloc(vi->inode_isize, GFP_NOFS); + copied = kmalloc(vi->inode_isize, GFP_KERNEL); if (!copied) { err = -ENOMEM; goto err_out; diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c index 5dea308764b45..e146d09151af4 100644 --- a/fs/erofs/utils.c +++ b/fs/erofs/utils.c @@ -81,7 +81,7 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, repeat: xa_lock(&sbi->managed_pslots); pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index, - NULL, grp, GFP_NOFS); + NULL, grp, GFP_KERNEL); if (pre) { if (xa_is_err(pre)) { pre = ERR_PTR(xa_err(pre)); diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 692c0c39be638..583c062cd0e42 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -230,7 +230,7 @@ static int z_erofs_bvec_enqueue(struct z_erofs_bvec_iter *iter, struct page *nextpage = *candidate_bvpage; if (!nextpage) { - nextpage = erofs_allocpage(pagepool, GFP_NOFS); + nextpage = erofs_allocpage(pagepool, GFP_KERNEL); if (!nextpage) return -ENOMEM; set_page_private(nextpage, Z_EROFS_SHORTLIVED_PAGE); @@ -302,7 +302,7 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int size) if (nrpages > pcs->maxpages) continue; - pcl = kmem_cache_zalloc(pcs->slab, GFP_NOFS); + pcl = kmem_cache_zalloc(pcs->slab, GFP_KERNEL); if (!pcl) return ERR_PTR(-ENOMEM); pcl->pclustersize = size; @@ -694,7 +694,7 @@ static void z_erofs_cache_invalidate_folio(struct folio *folio, DBG_BUGON(stop > folio_size(folio) || stop < length); if (offset == 0 && stop == folio_size(folio)) - while (!z_erofs_cache_release_folio(folio, GFP_NOFS)) + while (!z_erofs_cache_release_folio(folio, 0)) cond_resched(); } @@ -713,7 +713,7 @@ int erofs_init_managed_cache(struct super_block *sb) set_nlink(inode, 1); inode->i_size = OFFSET_MAX; inode->i_mapping->a_ops = &z_erofs_cache_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); EROFS_SB(sb)->managed_cache = inode; return 0; } -- GitLab From a3bdcdd022c68942a774e8e63424cc11c85aab78 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Thu, 25 Jan 2024 14:32:26 +0800 Subject: [PATCH 0145/1405] HID: hidraw: fix a problem of memory leak in hidraw_release() 'struct hidraw_list' is a circular queue whose head can be smaller than tail. Using 'list->tail != list->head' to release all memory that should be released. Fixes: a5623a203cff ("HID: hidraw: fix memory leak in hidraw_release()") Signed-off-by: Su Hui Reviewed-by: Dan Carpenter Signed-off-by: Jiri Kosina --- drivers/hid/hidraw.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index 13c8dd8cd3506..2bc762d31ac70 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -357,8 +357,11 @@ static int hidraw_release(struct inode * inode, struct file * file) down_write(&minors_rwsem); spin_lock_irqsave(&hidraw_table[minor]->list_lock, flags); - for (int i = list->tail; i < list->head; i++) - kfree(list->buffer[i].value); + while (list->tail != list->head) { + kfree(list->buffer[list->tail].value); + list->buffer[list->tail].value = NULL; + list->tail = (list->tail + 1) & (HIDRAW_BUFFER_SIZE - 1); + } list_del(&list->node); spin_unlock_irqrestore(&hidraw_table[minor]->list_lock, flags); kfree(list); -- GitLab From 809aa64ebff51eb170ee31a95f83b2d21efa32e2 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Fri, 12 Jan 2024 16:55:23 +0800 Subject: [PATCH 0146/1405] IB/hfi1: Fix a memleak in init_credit_return When dma_alloc_coherent fails to allocate dd->cr_base[i].va, init_credit_return should deallocate dd->cr_base and dd->cr_base[i] that allocated before. Or those resources would be never freed and a memleak is triggered. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Signed-off-by: Zhipeng Lu Link: https://lore.kernel.org/r/20240112085523.3731720-1-alexious@zju.edu.cn Acked-by: Dennis Dalessandro Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/pio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 68c621ff59d03..5a91cbda4aee6 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -2086,7 +2086,7 @@ int init_credit_return(struct hfi1_devdata *dd) "Unable to allocate credit return DMA range for NUMA %d\n", i); ret = -ENOMEM; - goto done; + goto free_cr_base; } } set_dev_node(&dd->pcidev->dev, dd->node); @@ -2094,6 +2094,10 @@ int init_credit_return(struct hfi1_devdata *dd) ret = 0; done: return ret; + +free_cr_base: + free_credit_return(dd); + goto done; } void free_credit_return(struct hfi1_devdata *dd) -- GitLab From 282fd66e2ef6e5d72b8fcd77efb2b282d2569464 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 22 Jan 2024 20:54:33 -0800 Subject: [PATCH 0147/1405] RDMA/bnxt_re: Avoid creating fence MR for newer adapters Limit the usage of fence MR to adapters older than Gen P5 products. Fixes: 1801d87b3598 ("RDMA/bnxt_re: Support new 5760X P7 devices") Signed-off-by: Kashyap Desai Signed-off-by: Bhargava Chenna Marreddy Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1705985677-15551-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 824349659d69d..e1ea49263145e 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -401,6 +401,10 @@ static void bnxt_re_create_fence_wqe(struct bnxt_re_pd *pd) struct bnxt_re_fence_data *fence = &pd->fence; struct ib_mr *ib_mr = &fence->mr->ib_mr; struct bnxt_qplib_swqe *wqe = &fence->bind_wqe; + struct bnxt_re_dev *rdev = pd->rdev; + + if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) + return; memset(wqe, 0, sizeof(*wqe)); wqe->type = BNXT_QPLIB_SWQE_TYPE_BIND_MW; @@ -455,6 +459,9 @@ static void bnxt_re_destroy_fence_mr(struct bnxt_re_pd *pd) struct device *dev = &rdev->en_dev->pdev->dev; struct bnxt_re_mr *mr = fence->mr; + if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) + return; + if (fence->mw) { bnxt_re_dealloc_mw(fence->mw); fence->mw = NULL; @@ -486,6 +493,9 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd) struct ib_mw *mw; int rc; + if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) + return 0; + dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES, DMA_BIDIRECTIONAL); rc = dma_mapping_error(dev, dma_addr); -- GitLab From 8fcbf0a55f71be7e562f10222abc9a34148189d0 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 22 Jan 2024 20:54:34 -0800 Subject: [PATCH 0148/1405] RDMA/bnxt_re: Remove a redundant check inside bnxt_re_vf_res_config After the cited commit, there is no possibility that this check can return true. Remove it. Fixes: a43c26fa2e6c ("RDMA/bnxt_re: Remove the sriov config callback") Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1705985677-15551-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/main.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index f022c922fae51..54b4d2f3a5d88 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -280,9 +280,6 @@ static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev) { - - if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) - return; rdev->num_vfs = pci_sriov_get_totalvfs(rdev->en_dev->pdev); if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { bnxt_re_set_resource_limits(rdev); -- GitLab From 8eaca6b5997bd8fd7039f2693e4ecf112823c816 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 22 Jan 2024 20:54:35 -0800 Subject: [PATCH 0149/1405] RDMA/bnxt_re: Fix unconditional fence for newer adapters Older adapters required an unconditional fence for non-wire memory operations. Newer adapters doesn't require this and therefore, disabling the unconditional fence. Fixes: 1801d87b3598 ("RDMA/bnxt_re: Support new 5760X P7 devices") Signed-off-by: Kashyap Desai Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1705985677-15551-4-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 28 ++++++++++++++---------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index e1ea49263145e..fdb4fde1acbe8 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2566,11 +2566,6 @@ static int bnxt_re_build_inv_wqe(const struct ib_send_wr *wr, wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV; wqe->local_inv.inv_l_key = wr->ex.invalidate_rkey; - /* Need unconditional fence for local invalidate - * opcode to work as expected. - */ - wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; - if (wr->send_flags & IB_SEND_SIGNALED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP; if (wr->send_flags & IB_SEND_SOLICITED) @@ -2593,12 +2588,6 @@ static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr, wqe->frmr.levels = qplib_frpl->hwq.level; wqe->type = BNXT_QPLIB_SWQE_TYPE_REG_MR; - /* Need unconditional fence for reg_mr - * opcode to function as expected. - */ - - wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; - if (wr->wr.send_flags & IB_SEND_SIGNALED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP; @@ -2729,6 +2718,18 @@ static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev, return rc; } +static void bnxt_re_legacy_set_uc_fence(struct bnxt_qplib_swqe *wqe) +{ + /* Need unconditional fence for non-wire memory opcode + * to work as expected. + */ + if (wqe->type == BNXT_QPLIB_SWQE_TYPE_LOCAL_INV || + wqe->type == BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR || + wqe->type == BNXT_QPLIB_SWQE_TYPE_REG_MR || + wqe->type == BNXT_QPLIB_SWQE_TYPE_BIND_MW) + wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; +} + int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { @@ -2808,8 +2809,11 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr, rc = -EINVAL; goto bad; } - if (!rc) + if (!rc) { + if (!bnxt_qplib_is_chip_gen_p5_p7(qp->rdev->chip_ctx)) + bnxt_re_legacy_set_uc_fence(&wqe); rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe); + } bad: if (rc) { ibdev_err(&qp->rdev->ibdev, -- GitLab From 3687b450c5f32e80f179ce4b09e0454da1449eac Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 22 Jan 2024 20:54:36 -0800 Subject: [PATCH 0150/1405] RDMA/bnxt_re: Return error for SRQ resize SRQ resize is not supported in the driver. But driver is not returning error from bnxt_re_modify_srq() for SRQ resize. Fixes: 37cb11acf1f7 ("RDMA/bnxt_re: Add SRQ support for Broadcom adapters") Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1705985677-15551-5-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index fdb4fde1acbe8..ce9c5bae83bf1 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1827,7 +1827,7 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, switch (srq_attr_mask) { case IB_SRQ_MAX_WR: /* SRQ resize is not supported */ - break; + return -EINVAL; case IB_SRQ_LIMIT: /* Change the SRQ threshold */ if (srq_attr->srq_limit > srq->qplib_srq.max_wqe) @@ -1842,13 +1842,12 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, /* On success, update the shadow */ srq->srq_limit = srq_attr->srq_limit; /* No need to Build and send response back to udata */ - break; + return 0; default: ibdev_err(&rdev->ibdev, "Unsupported srq_attr_mask 0x%x", srq_attr_mask); return -EINVAL; } - return 0; } int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr) -- GitLab From 80dde187f734cf9ccf988d5c2ef1a46b990660fd Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 22 Jan 2024 20:54:37 -0800 Subject: [PATCH 0151/1405] RDMA/bnxt_re: Add a missing check in bnxt_qplib_query_srq Before populating the response, driver has to check the status of HWRM command. Fixes: 37cb11acf1f7 ("RDMA/bnxt_re: Add SRQ support for Broadcom adapters") Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1705985677-15551-6-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index c98e04fe2ddd4..439d0c7c5d0ca 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -744,7 +744,8 @@ int bnxt_qplib_query_srq(struct bnxt_qplib_res *res, bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req), sizeof(resp), 0); rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); - srq->threshold = le16_to_cpu(sb->srq_limit); + if (!rc) + srq->threshold = le16_to_cpu(sb->srq_limit); dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr); -- GitLab From 116099ed345c932a8ae4a0d884a8f6cc54fd5fed Mon Sep 17 00:00:00 2001 From: Hu Yadi Date: Tue, 23 Jan 2024 14:26:21 +0800 Subject: [PATCH 0152/1405] selftests/landlock: Fix net_test build with old libc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One issue comes up while building selftest/landlock/net_test on my side (gcc 7.3/glibc-2.28/kernel-4.19). net_test.c: In function ‘set_service’: net_test.c:91:45: warning: implicit declaration of function ‘gettid’; [-Wimplicit-function-declaration] "_selftests-landlock-net-tid%d-index%d", gettid(), ^~~~~~ getgid net_test.c:(.text+0x4e0): undefined reference to `gettid' Signed-off-by: Hu Yadi Suggested-by: Jiao Reviewed-by: Berlin Fixes: a549d055a22e ("selftests/landlock: Add network tests") Link: https://lore.kernel.org/r/20240123062621.25082-1-hu.yadi@h3c.com [mic: Cosmetic fixes] Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/net_test.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c index ea5f727dd2577..efcde123af1f2 100644 --- a/tools/testing/selftests/landlock/net_test.c +++ b/tools/testing/selftests/landlock/net_test.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "common.h" @@ -54,6 +55,11 @@ struct service_fixture { }; }; +static pid_t sys_gettid(void) +{ + return syscall(__NR_gettid); +} + static int set_service(struct service_fixture *const srv, const struct protocol_variant prot, const unsigned short index) @@ -88,7 +94,7 @@ static int set_service(struct service_fixture *const srv, case AF_UNIX: srv->unix_addr.sun_family = prot.domain; sprintf(srv->unix_addr.sun_path, - "_selftests-landlock-net-tid%d-index%d", gettid(), + "_selftests-landlock-net-tid%d-index%d", sys_gettid(), index); srv->unix_addr_len = SUN_LEN(&srv->unix_addr); srv->unix_addr.sun_path[0] = '\0'; -- GitLab From 40b7835e74e0383be308d528c5e0e41b3bf72ade Mon Sep 17 00:00:00 2001 From: Hu Yadi Date: Wed, 24 Jan 2024 10:29:08 +0800 Subject: [PATCH 0153/1405] selftests/landlock: Fix fs_test build with old libc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One issue comes up while building selftest/landlock/fs_test on my side (gcc 7.3/glibc-2.28/kernel-4.19). gcc -Wall -O2 -isystem fs_test.c -lcap -o selftests/landlock/fs_test fs_test.c:4575:9: error: initializer element is not constant .mnt = mnt_tmp, ^~~~~~~ Signed-off-by: Hu Yadi Suggested-by: Jiao Reviewed-by: Berlin Link: https://lore.kernel.org/r/20240124022908.42100-1-hu.yadi@h3c.com Fixes: 04f9070e99a4 ("selftests/landlock: Add tests for pseudo filesystems") [mic: Factor out mount's data string and make mnt_tmp static] Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/fs_test.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 50818904397c5..2d6d9b43d958c 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -241,9 +241,11 @@ struct mnt_opt { const char *const data; }; -const struct mnt_opt mnt_tmp = { +#define MNT_TMP_DATA "size=4m,mode=700" + +static const struct mnt_opt mnt_tmp = { .type = "tmpfs", - .data = "size=4m,mode=700", + .data = MNT_TMP_DATA, }; static int mount_opt(const struct mnt_opt *const mnt, const char *const target) @@ -4632,7 +4634,10 @@ FIXTURE_VARIANT(layout3_fs) /* clang-format off */ FIXTURE_VARIANT_ADD(layout3_fs, tmpfs) { /* clang-format on */ - .mnt = mnt_tmp, + .mnt = { + .type = "tmpfs", + .data = MNT_TMP_DATA, + }, .file_path = file1_s1d1, }; -- GitLab From fc4657971be31ae679e2bbeee2fb8e93a7a063eb Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Wed, 17 Jan 2024 20:14:48 +0100 Subject: [PATCH 0154/1405] arm64: dts: rockchip: mark system power controller on rk3588-evb1 Mark the primary PMIC as system-power-controller, so that the system properly shuts down on poweroff. Signed-off-by: Sebastian Reichel Link: https://lore.kernel.org/r/20240117191555.86138-1-sebastian.reichel@collabora.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts index ac7c677b0fb9c..de30c2632b8e5 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts @@ -448,6 +448,7 @@ <&rk806_dvs2_null>, <&rk806_dvs3_null>; pinctrl-names = "default"; spi-max-frequency = <1000000>; + system-power-controller; vcc1-supply = <&vcc5v0_sys>; vcc2-supply = <&vcc5v0_sys>; -- GitLab From 8d35217149daa33358c284aca6a56d5ab92cfc6c Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 15 Dec 2023 03:32:22 +0200 Subject: [PATCH 0155/1405] drm/msm/mdss: specify cfg bandwidth for SDM670 Lower the requested CFG bus bandwidth for the SDM670 platform. The default value is 153600 kBps, which is twice as big as required by the platform according to the vendor kernel. Fixes: a55c8ff252d3 ("drm/msm/mdss: Handle the reg bus ICC path") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Tested-by: Richard Acayan Patchwork: https://patchwork.freedesktop.org/patch/572182/ Link: https://lore.kernel.org/r/20231215013222.827975-1-dmitry.baryshkov@linaro.org Signed-off-by: Abhinav Kumar --- drivers/gpu/drm/msm/msm_mdss.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index 455b2e3a0cdd4..35423d10aafa9 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -562,6 +562,7 @@ static const struct msm_mdss_data sdm670_data = { .ubwc_enc_version = UBWC_2_0, .ubwc_dec_version = UBWC_2_0, .highest_bank_bit = 1, + .reg_bus_bw = 76800, }; static const struct msm_mdss_data sdm845_data = { -- GitLab From b64787840080bdbd048bb9c68222ad17236cbd7e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 24 Jan 2024 11:25:50 -0800 Subject: [PATCH 0156/1405] selftests: tcp_ao: add a config file Still a bit unclear whether each directory should have its own config file, but assuming they should lets add one for tcp_ao. The following tests still fail with this config in place: - rst_ipv4, - rst_ipv6, - bench-lookups_ipv6. other 21 pass. Fixes: d11301f65977 ("selftests/net: Add TCP-AO ICMPs accept test") Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://lore.kernel.org/r/20240124192550.1865743-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tcp_ao/config | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 tools/testing/selftests/net/tcp_ao/config diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config new file mode 100644 index 0000000000000..d3277a9de987c --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/config @@ -0,0 +1,10 @@ +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_RMD160=y +CONFIG_CRYPTO_SHA1=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_VRF=y +CONFIG_TCP_AO=y +CONFIG_TCP_MD5SIG=y +CONFIG_VETH=m -- GitLab From 98cb12eb52a780e682bea8372fdb2912c08132dd Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 24 Jan 2024 22:33:20 +0100 Subject: [PATCH 0157/1405] selftests: net: remove dependency on ebpf tests Several net tests requires an XDP program build under the ebpf directory, and error out if such program is not available. That makes running successful net test hard, let's duplicate into the net dir the [very small] program, re-using the existing rules to build it, and finally dropping the bogus dependency. Signed-off-by: Paolo Abeni Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/28e7af7c031557f691dc8045ee41dd549dd5e74c.1706131762.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 5 +++-- tools/testing/selftests/net/udpgro.sh | 4 ++-- tools/testing/selftests/net/udpgro_bench.sh | 4 ++-- tools/testing/selftests/net/udpgro_frglist.sh | 6 +++--- tools/testing/selftests/net/udpgro_fwd.sh | 2 +- tools/testing/selftests/net/veth.sh | 4 ++-- tools/testing/selftests/net/xdp_dummy.c | 13 +++++++++++++ 7 files changed, 26 insertions(+), 12 deletions(-) create mode 100644 tools/testing/selftests/net/xdp_dummy.c diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 50818075e566e..304d8b852ef05 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -84,6 +84,7 @@ TEST_PROGS += sctp_vrf.sh TEST_GEN_FILES += sctp_hello TEST_GEN_FILES += csum TEST_GEN_FILES += nat6to4.o +TEST_GEN_FILES += xdp_dummy.o TEST_GEN_FILES += ip_local_port_range TEST_GEN_FILES += bind_wildcard TEST_PROGS += test_vxlan_mdb.sh @@ -104,7 +105,7 @@ $(OUTPUT)/tcp_inq: LDLIBS += -lpthread $(OUTPUT)/bind_bhash: LDLIBS += -lpthread $(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/ -# Rules to generate bpf obj nat6to4.o +# Rules to generate bpf objs CLANG ?= clang SCRATCH_DIR := $(OUTPUT)/tools BUILD_DIR := $(SCRATCH_DIR)/build @@ -139,7 +140,7 @@ endif CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) -$(OUTPUT)/nat6to4.o: nat6to4.c $(BPFOBJ) | $(MAKE_DIRS) +$(OUTPUT)/nat6to4.o $(OUTPUT)/xdp_dummy.o: $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS) $(CLANG) -O2 --target=bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index af5dc57c8ce93..8802604148dda 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -7,7 +7,7 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.o" # set global exit status, but never reset nonzero one. check_err() @@ -197,7 +197,7 @@ run_all() { } if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index cb664679b4342..7080eae5312b2 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -7,7 +7,7 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.o" cleanup() { local -r jobs="$(jobs -p)" @@ -84,7 +84,7 @@ run_all() { } if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index dd47fa96f6b3e..e1ff645bd3d1c 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -7,7 +7,7 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.o" cleanup() { local -r jobs="$(jobs -p)" @@ -85,12 +85,12 @@ run_all() { } if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit -1 fi if [ ! -f nat6to4.o ]; then - echo "Missing nat6to4 helper. Build bpf nat6to4.o selftest first" + echo "Missing nat6to4 helper. Run 'make' first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index c079565add392..5fa8659ab13d6 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.o" readonly BASE="ns-$(mktemp -u XXXXXX)" readonly SRC=2 readonly DST=1 diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 2d073595c6202..27574bbf2d638 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.o" readonly STATS="$(mktemp -p /tmp ns-XXXXXX)" readonly BASE=`basename $STATS` readonly SRC=2 @@ -218,7 +218,7 @@ while getopts "hs:" option; do done if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit 1 fi diff --git a/tools/testing/selftests/net/xdp_dummy.c b/tools/testing/selftests/net/xdp_dummy.c new file mode 100644 index 0000000000000..d988b2e0cee84 --- /dev/null +++ b/tools/testing/selftests/net/xdp_dummy.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define KBUILD_MODNAME "xdp_dummy" +#include +#include + +SEC("xdp") +int xdp_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; -- GitLab From f5173fe3e13b2cbd25d0d73f40acd923d75add55 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 24 Jan 2024 22:33:21 +0100 Subject: [PATCH 0158/1405] selftests: net: included needed helper in the install targets The blamed commit below introduce a dependency in some net self-tests towards a newly introduce helper script. Such script is currently not included into the TEST_PROGS_EXTENDED list and thus is not installed, causing failure for the relevant tests when executed from the install dir. Fix the issue updating the install targets. Fixes: 3bdd9fd29cb0 ("selftests/net: synchronize udpgro tests' tx and rx connection") Signed-off-by: Paolo Abeni Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/076e8758e21ff2061cc9f81640e7858df775f0a9.1706131762.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 304d8b852ef05..48c6f93b81498 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -55,6 +55,7 @@ TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh lib.sh +TEST_PROGS_EXTENDED += net_helper.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite -- GitLab From 4acffb66630a0e4800880baa61a54ef18047ccd3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 24 Jan 2024 22:33:22 +0100 Subject: [PATCH 0159/1405] selftests: net: explicitly wait for listener ready The UDP GRO forwarding test still hard-code an arbitrary pause to wait for the UDP listener becoming ready in background. That causes sporadic failures depending on the host load. Replace the sleep with the existing helper waiting for the desired port being exposed. Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests") Signed-off-by: Paolo Abeni Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/4d58900fb09cef42749cfcf2ad7f4b91a97d225c.1706131762.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgro_fwd.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 5fa8659ab13d6..d6b9c759043ca 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +source net_helper.sh + BPF_FILE="xdp_dummy.o" readonly BASE="ns-$(mktemp -u XXXXXX)" readonly SRC=2 @@ -119,7 +121,7 @@ run_test() { ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000 ip netns exec $NS_DST ./udpgso_bench_rx -C 1000 -R 10 -n 10 -l 1300 $rx_args & local spid=$! - sleep 0.1 + wait_local_port_listen "$NS_DST" 8000 udp ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst local retc=$? wait $spid @@ -168,7 +170,7 @@ run_bench() { ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus" ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 1000 -R 10 & local spid=$! - sleep 0.1 + wait_local_port_listen "$NS_DST" 8000 udp ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst local retc=$? wait $spid -- GitLab From 39b383d77961a544d896242051168b8129cf5be7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 24 Jan 2024 15:36:30 -0800 Subject: [PATCH 0160/1405] selftests: tcp_ao: set the timeout to 2 minutes The default timeout for tests is 45sec, bench-lookups_ipv6 seems to take around 50sec when running in a VM without HW acceleration. Give it a 2x margin and set the timeout to 120sec. Fixes: d1066c9c58d4 ("selftests/net: Add test/benchmark for removing MKTs") Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://lore.kernel.org/r/20240124233630.1977708-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tcp_ao/settings | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/net/tcp_ao/settings diff --git a/tools/testing/selftests/net/tcp_ao/settings b/tools/testing/selftests/net/tcp_ao/settings new file mode 100644 index 0000000000000..6091b45d226ba --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/settings @@ -0,0 +1 @@ +timeout=120 -- GitLab From fc836129f708407502632107e58d48f54b1caf75 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 24 Jan 2024 14:13:44 +0800 Subject: [PATCH 0161/1405] selftests/net/lib: update busywait timeout value The busywait timeout value is a millisecond, not a second. So the current setting 2 is too small. On slow/busy host (or VMs) the current timeout can expire even on "correct" execution, causing random failures. Let's copy the WAIT_TIMEOUT from forwarding/lib.sh and set BUSYWAIT_TIMEOUT here. Fixes: 25ae948b4478 ("selftests/net: add lib.sh") Signed-off-by: Hangbin Liu Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240124061344.1864484-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index dca5494438011..f9fe182dfbd44 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -4,6 +4,9 @@ ############################################################################## # Defines +WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} +BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms + # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 # namespace list created by setup_ns @@ -48,7 +51,7 @@ cleanup_ns() for ns in "$@"; do ip netns delete "${ns}" &> /dev/null - if ! busywait 2 ip netns list \| grep -vq "^$ns$" &> /dev/null; then + if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then echo "Warn: Failed to remove namespace $ns" ret=1 fi -- GitLab From 534326711000c318fe1523c77308450522baa499 Mon Sep 17 00:00:00 2001 From: Praveen Kaligineedi Date: Wed, 24 Jan 2024 08:10:25 -0800 Subject: [PATCH 0162/1405] gve: Fix skb truesize underestimation For a skb frag with a newly allocated copy page, the true size is incorrectly set to packet buffer size. It should be set to PAGE_SIZE instead. Fixes: 82fd151d38d9 ("gve: Reduce alloc and copy costs in the GQ rx path") Signed-off-by: Praveen Kaligineedi Link: https://lore.kernel.org/r/20240124161025.1819836-1-pkaligineedi@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_rx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 7a8dc5386ffff..76615d47e055a 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -356,7 +356,7 @@ static enum pkt_hash_types gve_rss_type(__be16 pkt_flags) static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi, struct gve_rx_slot_page_info *page_info, - u16 packet_buffer_size, u16 len, + unsigned int truesize, u16 len, struct gve_rx_ctx *ctx) { u32 offset = page_info->page_offset + page_info->pad; @@ -389,10 +389,10 @@ static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi, if (skb != ctx->skb_head) { ctx->skb_head->len += len; ctx->skb_head->data_len += len; - ctx->skb_head->truesize += packet_buffer_size; + ctx->skb_head->truesize += truesize; } skb_add_rx_frag(skb, num_frags, page_info->page, - offset, len, packet_buffer_size); + offset, len, truesize); return ctx->skb_head; } @@ -486,7 +486,7 @@ static struct sk_buff *gve_rx_copy_to_pool(struct gve_rx_ring *rx, memcpy(alloc_page_info.page_address, src, page_info->pad + len); skb = gve_rx_add_frags(napi, &alloc_page_info, - rx->packet_buffer_size, + PAGE_SIZE, len, ctx); u64_stats_update_begin(&rx->statss); -- GitLab From cefa98e806fd4e2a5e2047457a11ae5f17b8f621 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Wed, 24 Jan 2024 17:19:08 +0200 Subject: [PATCH 0163/1405] nfp: flower: add hardware offload check for post ct entry The nfp offload flow pay will not allocate a mask id when the out port is openvswitch internal port. This is because these flows are used to configure the pre_tun table and are never actually send to the firmware as an add-flow message. When a tc rule which action contains ct and the post ct entry's out port is openvswitch internal port, the merge offload flow pay with the wrong mask id of 0 will be send to the firmware. Actually, the nfp can not support hardware offload for this situation, so return EOPNOTSUPP. Fixes: bd0fe7f96a3c ("nfp: flower-ct: add zone table entry when handling pre/post_ct flows") CC: stable@vger.kernel.org # 5.14+ Signed-off-by: Hui Zhou Signed-off-by: Louis Peens Link: https://lore.kernel.org/r/20240124151909.31603-2-louis.peens@corigine.com Signed-off-by: Jakub Kicinski --- .../ethernet/netronome/nfp/flower/conntrack.c | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index 2967bab725056..726d8cdf0b9cf 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1864,10 +1864,30 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, { struct flow_rule *rule = flow_cls_offload_flow_rule(flow); struct nfp_fl_ct_flow_entry *ct_entry; + struct flow_action_entry *ct_goto; struct nfp_fl_ct_zone_entry *zt; + struct flow_action_entry *act; bool wildcarded = false; struct flow_match_ct ct; - struct flow_action_entry *ct_goto; + int i; + + flow_action_for_each(i, act, &rule->action) { + switch (act->id) { + case FLOW_ACTION_REDIRECT: + case FLOW_ACTION_REDIRECT_INGRESS: + case FLOW_ACTION_MIRRED: + case FLOW_ACTION_MIRRED_INGRESS: + if (act->dev->rtnl_link_ops && + !strcmp(act->dev->rtnl_link_ops->kind, "openvswitch")) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: out port is openvswitch internal port"); + return -EOPNOTSUPP; + } + break; + default: + break; + } + } flow_rule_match_ct(rule, &ct); if (!ct.mask->ct_zone) { -- GitLab From 3a007b8009b5f8af021021b7a590a6da0dc4c6e0 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Wed, 24 Jan 2024 17:19:09 +0200 Subject: [PATCH 0164/1405] nfp: flower: fix hardware offload for the transfer layer port The nfp driver will merge the tp source port and tp destination port into one dword which the offset must be zero to do hardware offload. However, the mangle action for the tp source port and tp destination port is separated for tc ct action. Modify the mangle action for the FLOW_ACT_MANGLE_HDR_TYPE_TCP and FLOW_ACT_MANGLE_HDR_TYPE_UDP to satisfy the nfp driver offload check for the tp port. The mangle action provides a 4B value for source, and a 4B value for the destination, but only 2B of each contains the useful information. For offload the 2B of each is combined into a single 4B word. Since the incoming mask for the source is '0xFFFF' the shift-left will throw away the 0xFFFF part. When this gets combined together in the offload it will clear the destination field. Fix this by setting the lower bits back to 0xFFFF, effectively doing a rotate-left operation on the mask. Fixes: 5cee92c6f57a ("nfp: flower: support hw offload for ct nat action") CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Hui Zhou Signed-off-by: Louis Peens Link: https://lore.kernel.org/r/20240124151909.31603-3-louis.peens@corigine.com Signed-off-by: Jakub Kicinski --- .../ethernet/netronome/nfp/flower/conntrack.c | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index 726d8cdf0b9cf..15180538b80a1 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1424,10 +1424,30 @@ static void nfp_nft_ct_translate_mangle_action(struct flow_action_entry *mangle_ mangle_action->mangle.mask = (__force u32)cpu_to_be32(mangle_action->mangle.mask); return; + /* Both struct tcphdr and struct udphdr start with + * __be16 source; + * __be16 dest; + * so we can use the same code for both. + */ case FLOW_ACT_MANGLE_HDR_TYPE_TCP: case FLOW_ACT_MANGLE_HDR_TYPE_UDP: - mangle_action->mangle.val = (__force u16)cpu_to_be16(mangle_action->mangle.val); - mangle_action->mangle.mask = (__force u16)cpu_to_be16(mangle_action->mangle.mask); + if (mangle_action->mangle.offset == offsetof(struct tcphdr, source)) { + mangle_action->mangle.val = + (__force u32)cpu_to_be32(mangle_action->mangle.val << 16); + /* The mask of mangle action is inverse mask, + * so clear the dest tp port with 0xFFFF to + * instead of rotate-left operation. + */ + mangle_action->mangle.mask = + (__force u32)cpu_to_be32(mangle_action->mangle.mask << 16 | 0xFFFF); + } + if (mangle_action->mangle.offset == offsetof(struct tcphdr, dest)) { + mangle_action->mangle.offset = 0; + mangle_action->mangle.val = + (__force u32)cpu_to_be32(mangle_action->mangle.val); + mangle_action->mangle.mask = + (__force u32)cpu_to_be32(mangle_action->mangle.mask); + } return; default: -- GitLab From cae1f1c36661f28c92a1db9113961a9ebd61dbaa Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 24 Jan 2024 16:22:09 +0000 Subject: [PATCH 0165/1405] net: ethernet: mtk_eth_soc: set DMA coherent mask to get PPE working Set DMA coherent mask to 32-bit which makes PPE offloading engine start working on BPi-R4 which got 4 GiB of RAM. Fixes: 2d75891ebc09 ("net: ethernet: mtk_eth_soc: support 36-bit DMA addressing on MT7988") Suggested-by: Elad Yifee Signed-off-by: Daniel Golle Link: https://lore.kernel.org/r/97e90925368b405f0974b9b15f1b7377c4a329ad.1706113251.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index a6e91573f8dae..de123350bd46b 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -4761,7 +4761,10 @@ static int mtk_probe(struct platform_device *pdev) } if (MTK_HAS_CAPS(eth->soc->caps, MTK_36BIT_DMA)) { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(36)); + err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(36)); + if (!err) + err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); + if (err) { dev_err(&pdev->dev, "Wrong DMA config\n"); return -EINVAL; -- GitLab From ff63cc2e95065bea978d2db01f7e7356cca3d021 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 24 Jan 2024 05:18:23 +0000 Subject: [PATCH 0166/1405] net: phy: mediatek-ge-soc: sync driver with MediaTek SDK Sync initialization and calibration routines with MediaTek's reference driver. Improves compliance and resolves link stability issues with CH340 IoT devices connected to MT798x built-in PHYs. Fixes: 98c485eaf509 ("net: phy: add driver for MediaTek SoC built-in GE PHYs") Signed-off-by: Daniel Golle Link: https://lore.kernel.org/r/f2195279c234c0f618946424b8236026126bc595.1706071311.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/mediatek-ge-soc.c | 147 ++++++++++++++++-------------- 1 file changed, 81 insertions(+), 66 deletions(-) diff --git a/drivers/net/phy/mediatek-ge-soc.c b/drivers/net/phy/mediatek-ge-soc.c index 8a20d9889f105..0f3a1538a8b8e 100644 --- a/drivers/net/phy/mediatek-ge-soc.c +++ b/drivers/net/phy/mediatek-ge-soc.c @@ -489,7 +489,7 @@ static int tx_r50_fill_result(struct phy_device *phydev, u16 tx_r50_cal_val, u16 reg, val; if (phydev->drv->phy_id == MTK_GPHY_ID_MT7988) - bias = -2; + bias = -1; val = clamp_val(bias + tx_r50_cal_val, 0, 63); @@ -705,6 +705,11 @@ static int tx_vcm_cal_sw(struct phy_device *phydev, u8 rg_txreserve_x) static void mt798x_phy_common_finetune(struct phy_device *phydev) { phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); + /* SlvDSPreadyTime = 24, MasDSPreadyTime = 24 */ + __phy_write(phydev, 0x11, 0xc71); + __phy_write(phydev, 0x12, 0xc); + __phy_write(phydev, 0x10, 0x8fae); + /* EnabRandUpdTrig = 1 */ __phy_write(phydev, 0x11, 0x2f00); __phy_write(phydev, 0x12, 0xe); @@ -715,15 +720,56 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev) __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x83aa); - /* TrFreeze = 0 */ + /* FfeUpdGainForce = 1(Enable), FfeUpdGainForceVal = 4 */ + __phy_write(phydev, 0x11, 0x240); + __phy_write(phydev, 0x12, 0x0); + __phy_write(phydev, 0x10, 0x9680); + + /* TrFreeze = 0 (mt7988 default) */ __phy_write(phydev, 0x11, 0x0); __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x9686); + /* SSTrKp100 = 5 */ + /* SSTrKf100 = 6 */ + /* SSTrKp1000Mas = 5 */ + /* SSTrKf1000Mas = 6 */ /* SSTrKp1000Slv = 5 */ + /* SSTrKf1000Slv = 6 */ __phy_write(phydev, 0x11, 0xbaef); __phy_write(phydev, 0x12, 0x2e); __phy_write(phydev, 0x10, 0x968c); + phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); +} + +static void mt7981_phy_finetune(struct phy_device *phydev) +{ + u16 val[8] = { 0x01ce, 0x01c1, + 0x020f, 0x0202, + 0x03d0, 0x03c0, + 0x0013, 0x0005 }; + int i, k; + + /* 100M eye finetune: + * Keep middle level of TX MLT3 shapper as default. + * Only change TX MLT3 overshoot level here. + */ + for (k = 0, i = 1; i < 12; i++) { + if (i % 3 == 0) + continue; + phy_write_mmd(phydev, MDIO_MMD_VEND1, i, val[k++]); + } + + phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); + /* ResetSyncOffset = 6 */ + __phy_write(phydev, 0x11, 0x600); + __phy_write(phydev, 0x12, 0x0); + __phy_write(phydev, 0x10, 0x8fc0); + + /* VgaDecRate = 1 */ + __phy_write(phydev, 0x11, 0x4c2a); + __phy_write(phydev, 0x12, 0x3e); + __phy_write(phydev, 0x10, 0x8fa4); /* MrvlTrFix100Kp = 3, MrvlTrFix100Kf = 2, * MrvlTrFix1000Kp = 3, MrvlTrFix1000Kf = 2 @@ -738,7 +784,7 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev) __phy_write(phydev, 0x10, 0x8ec0); phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); - /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 9*/ + /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 9 */ phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG234, MTK_PHY_TR_OPEN_LOOP_EN_MASK | MTK_PHY_LPF_X_AVERAGE_MASK, BIT(0) | FIELD_PREP(MTK_PHY_LPF_X_AVERAGE_MASK, 0x9)); @@ -771,48 +817,6 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev) phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_LDO_OUTPUT_V, 0x2222); } -static void mt7981_phy_finetune(struct phy_device *phydev) -{ - u16 val[8] = { 0x01ce, 0x01c1, - 0x020f, 0x0202, - 0x03d0, 0x03c0, - 0x0013, 0x0005 }; - int i, k; - - /* 100M eye finetune: - * Keep middle level of TX MLT3 shapper as default. - * Only change TX MLT3 overshoot level here. - */ - for (k = 0, i = 1; i < 12; i++) { - if (i % 3 == 0) - continue; - phy_write_mmd(phydev, MDIO_MMD_VEND1, i, val[k++]); - } - - phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); - /* SlvDSPreadyTime = 24, MasDSPreadyTime = 24 */ - __phy_write(phydev, 0x11, 0xc71); - __phy_write(phydev, 0x12, 0xc); - __phy_write(phydev, 0x10, 0x8fae); - - /* ResetSyncOffset = 6 */ - __phy_write(phydev, 0x11, 0x600); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x8fc0); - - /* VgaDecRate = 1 */ - __phy_write(phydev, 0x11, 0x4c2a); - __phy_write(phydev, 0x12, 0x3e); - __phy_write(phydev, 0x10, 0x8fa4); - - /* FfeUpdGainForce = 4 */ - __phy_write(phydev, 0x11, 0x240); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x9680); - - phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); -} - static void mt7988_phy_finetune(struct phy_device *phydev) { u16 val[12] = { 0x0187, 0x01cd, 0x01c8, 0x0182, @@ -827,17 +831,7 @@ static void mt7988_phy_finetune(struct phy_device *phydev) /* TCT finetune */ phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_TX_FILTER, 0x5); - /* Disable TX power saving */ - phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RXADC_CTRL_RG7, - MTK_PHY_DA_AD_BUF_BIAS_LP_MASK, 0x3 << 8); - phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); - - /* SlvDSPreadyTime = 24, MasDSPreadyTime = 12 */ - __phy_write(phydev, 0x11, 0x671); - __phy_write(phydev, 0x12, 0xc); - __phy_write(phydev, 0x10, 0x8fae); - /* ResetSyncOffset = 5 */ __phy_write(phydev, 0x11, 0x500); __phy_write(phydev, 0x12, 0x0); @@ -845,13 +839,27 @@ static void mt7988_phy_finetune(struct phy_device *phydev) /* VgaDecRate is 1 at default on mt7988 */ - phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); + /* MrvlTrFix100Kp = 6, MrvlTrFix100Kf = 7, + * MrvlTrFix1000Kp = 6, MrvlTrFix1000Kf = 7 + */ + __phy_write(phydev, 0x11, 0xb90a); + __phy_write(phydev, 0x12, 0x6f); + __phy_write(phydev, 0x10, 0x8f82); + + /* RemAckCntLimitCtrl = 1 */ + __phy_write(phydev, 0x11, 0xfbba); + __phy_write(phydev, 0x12, 0xc3); + __phy_write(phydev, 0x10, 0x87f8); - phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_2A30); - /* TxClkOffset = 2 */ - __phy_modify(phydev, MTK_PHY_ANARG_RG, MTK_PHY_TCLKOFFSET_MASK, - FIELD_PREP(MTK_PHY_TCLKOFFSET_MASK, 0x2)); phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); + + /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 10 */ + phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG234, + MTK_PHY_TR_OPEN_LOOP_EN_MASK | MTK_PHY_LPF_X_AVERAGE_MASK, + BIT(0) | FIELD_PREP(MTK_PHY_LPF_X_AVERAGE_MASK, 0xa)); + + /* rg_tr_lpf_cnt_val = 1023 */ + phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_LPF_CNT_VAL, 0x3ff); } static void mt798x_phy_eee(struct phy_device *phydev) @@ -884,11 +892,11 @@ static void mt798x_phy_eee(struct phy_device *phydev) MTK_PHY_LPI_SLV_SEND_TX_EN, FIELD_PREP(MTK_PHY_LPI_SLV_SEND_TX_TIMER_MASK, 0x120)); - phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG239, - MTK_PHY_LPI_SEND_LOC_TIMER_MASK | - MTK_PHY_LPI_TXPCS_LOC_RCV, - FIELD_PREP(MTK_PHY_LPI_SEND_LOC_TIMER_MASK, 0x117)); + /* Keep MTK_PHY_LPI_SEND_LOC_TIMER as 375 */ + phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG239, + MTK_PHY_LPI_TXPCS_LOC_RCV); + /* This also fixes some IoT issues, such as CH340 */ phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG2C7, MTK_PHY_MAX_GAIN_MASK | MTK_PHY_MIN_GAIN_MASK, FIELD_PREP(MTK_PHY_MAX_GAIN_MASK, 0x8) | @@ -922,7 +930,7 @@ static void mt798x_phy_eee(struct phy_device *phydev) __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x9690); - /* REG_EEE_st2TrKf1000 = 3 */ + /* REG_EEE_st2TrKf1000 = 2 */ __phy_write(phydev, 0x11, 0x114f); __phy_write(phydev, 0x12, 0x2); __phy_write(phydev, 0x10, 0x969a); @@ -947,7 +955,7 @@ static void mt798x_phy_eee(struct phy_device *phydev) __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x96b8); - /* REGEEE_wake_slv_tr_wait_dfesigdet_en = 1 */ + /* REGEEE_wake_slv_tr_wait_dfesigdet_en = 0 */ __phy_write(phydev, 0x11, 0x1463); __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x96ca); @@ -1459,6 +1467,13 @@ static int mt7988_phy_probe(struct phy_device *phydev) if (err) return err; + /* Disable TX power saving at probing to: + * 1. Meet common mode compliance test criteria + * 2. Make sure that TX-VCM calibration works fine + */ + phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RXADC_CTRL_RG7, + MTK_PHY_DA_AD_BUF_BIAS_LP_MASK, 0x3 << 8); + return mt798x_phy_calibration(phydev); } -- GitLab From ab4443fe3ca6298663a55c4a70efc6c3ce913ca6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 4 Jan 2024 09:58:39 +0100 Subject: [PATCH 0167/1405] readahead: avoid multiple marked readahead pages ra_alloc_folio() marks a page that should trigger next round of async readahead. However it rounds up computed index to the order of page being allocated. This can however lead to multiple consecutive pages being marked with readahead flag. Consider situation with index == 1, mark == 1, order == 0. We insert order 0 page at index 1 and mark it. Then we bump order to 1, index to 2, mark (still == 1) is rounded up to 2 so page at index 2 is marked as well. Then we bump order to 2, index is incremented to 4, mark gets rounded to 4 so page at index 4 is marked as well. The fact that multiple pages get marked within a single readahead window confuses the readahead logic and results in readahead window being trimmed back to 1. This situation is triggered in particular when maximum readahead window size is not a power of two (in the observed case it was 768 KB) and as a result sequential read throughput suffers. Fix the problem by rounding 'mark' down instead of up. Because the index is naturally aligned to 'order', we are guaranteed 'rounded mark' == index iff 'mark' is within the page we are allocating at 'index' and thus exactly one page is marked with readahead flag as required by the readahead code and sequential read performance is restored. This effectively reverts part of commit b9ff43dd2743 ("mm/readahead: Fix readahead with large folios"). The commit changed the rounding with the rationale: "... we were setting the readahead flag on the folio which contains the last byte read from the block. This is wrong because we will trigger readahead at the end of the read without waiting to see if a subsequent read is going to use the pages we just read." Although this is true, the fact is this was always the case with read sizes not aligned to folio boundaries and large folios in the page cache just make the situation more obvious (and frequent). Also for sequential read workloads it is better to trigger the readahead earlier rather than later. It is true that the difference in the rounding and thus earlier triggering of the readahead can result in reading more for semi-random workloads. However workloads really suffering from this seem to be rare. In particular I have verified that the workload described in commit b9ff43dd2743 ("mm/readahead: Fix readahead with large folios") of reading random 100k blocks from a file like: [reader] bs=100k rw=randread numjobs=1 size=64g runtime=60s is not impacted by the rounding change and achieves ~70MB/s in both cases. [jack@suse.cz: fix one more place where mark rounding was done as well] Link: https://lkml.kernel.org/r/20240123153254.5206-1-jack@suse.cz Link: https://lkml.kernel.org/r/20240104085839.21029-1-jack@suse.cz Fixes: b9ff43dd2743 ("mm/readahead: Fix readahead with large folios") Signed-off-by: Jan Kara Cc: Matthew Wilcox Cc: Guo Xuenan Cc: Signed-off-by: Andrew Morton --- mm/readahead.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/readahead.c b/mm/readahead.c index 23620c57c1225..2648ec4f04947 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -469,7 +469,7 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, if (!folio) return -ENOMEM; - mark = round_up(mark, 1UL << order); + mark = round_down(mark, 1UL << order); if (index == mark) folio_set_readahead(folio); err = filemap_add_folio(ractl->mapping, folio, index, gfp); @@ -575,7 +575,7 @@ static void ondemand_readahead(struct readahead_control *ractl, * It's the expected callback index, assume sequential access. * Ramp up sizes, and push forward the readahead window. */ - expected = round_up(ra->start + ra->size - ra->async_size, + expected = round_down(ra->start + ra->size - ra->async_size, 1UL << order); if (index == expected || index == (ra->start + ra->size)) { ra->start += ra->size; -- GitLab From 19d3e221807772f8443e565234a6fdc5a2b09d26 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 12 Jan 2024 10:08:40 -0800 Subject: [PATCH 0168/1405] fs/hugetlbfs/inode.c: mm/memory-failure.c: fix hugetlbfs hwpoison handling has_extra_refcount() makes the assumption that the page cache adds a ref count of 1 and subtracts this in the extra_pins case. Commit a08c7193e4f1 (mm/filemap: remove hugetlb special casing in filemap.c) modifies __filemap_add_folio() by calling folio_ref_add(folio, nr); for all cases (including hugtetlb) where nr is the number of pages in the folio. We should adjust the number of references coming from the page cache by subtracing the number of pages rather than 1. In hugetlbfs_read_iter(), folio_test_has_hwpoisoned() is testing the wrong flag as, in the hugetlb case, memory-failure code calls folio_test_set_hwpoison() to indicate poison. folio_test_hwpoison() is the correct function to test for that flag. After these fixes, the hugetlb hwpoison read selftest passes all cases. Link: https://lkml.kernel.org/r/20240112180840.367006-1-sidhartha.kumar@oracle.com Fixes: a08c7193e4f1 ("mm/filemap: remove hugetlb special casing in filemap.c") Signed-off-by: Sidhartha Kumar Closes: https://lore.kernel.org/linux-mm/20230713001833.3778937-1-jiaqiyan@google.com/T/#m8e1469119e5b831bbd05d495f96b842e4a1c5519 Reported-by: Muhammad Usama Anjum Tested-by: Muhammad Usama Anjum Acked-by: Miaohe Lin Acked-by: Muchun Song Cc: James Houghton Cc: Jiaqi Yan Cc: Matthew Wilcox (Oracle) Cc: Naoya Horiguchi Cc: [6.7+] Signed-off-by: Andrew Morton --- fs/hugetlbfs/inode.c | 2 +- mm/memory-failure.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index ea5b8e57d904e..671664fed3077 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -340,7 +340,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) } else { folio_unlock(folio); - if (!folio_test_has_hwpoisoned(folio)) + if (!folio_test_hwpoison(folio)) want = nr; else { /* diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 4f9b61f4a6682..636280d04008d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -982,7 +982,7 @@ static bool has_extra_refcount(struct page_state *ps, struct page *p, int count = page_count(p) - 1; if (extra_pins) - count -= 1; + count -= folio_nr_pages(page_folio(p)); if (count > 0) { pr_err("%#lx: %s still referenced by %d users\n", -- GitLab From f8ee4361b7be3f0c5bd21ee47561473ddf3aa17b Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Fri, 12 Jan 2024 12:18:50 +0500 Subject: [PATCH 0169/1405] selftests/mm: mremap_test: fix build warning Use 2 separate variables of types int and unsigned long long instead of confusing them. This corrects the correct print format for each of them and removes the build warning: warning: format `%d' expects argument of type `int', but argument 2 has type `long long unsigned int' Link: https://lkml.kernel.org/r/20240112071851.612930-1-usama.anjum@collabora.com Fixes: a4cb3b243343 ("selftests: mm: add a test for remapping to area immediately after existing mapping") Signed-off-by: Muhammad Usama Anjum Cc: Joel Fernandes (Google) Cc: Lorenzo Stoakes Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/mremap_test.c | 27 ++++++++++++------------ 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 1d4c1589c3055..2f8b991f78cb4 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -360,7 +360,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb, char pattern_seed) { void *addr, *src_addr, *dest_addr, *dest_preamble_addr; - unsigned long long i; + int d; + unsigned long long t; struct timespec t_start = {0, 0}, t_end = {0, 0}; long long start_ns, end_ns, align_mask, ret, offset; unsigned long long threshold; @@ -378,8 +379,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Set byte pattern for source block. */ srand(pattern_seed); - for (i = 0; i < threshold; i++) - memset((char *) src_addr + i, (char) rand(), 1); + for (t = 0; t < threshold; t++) + memset((char *) src_addr + t, (char) rand(), 1); /* Mask to zero out lower bits of address for alignment */ align_mask = ~(c.dest_alignment - 1); @@ -420,8 +421,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Set byte pattern for the dest preamble block. */ srand(pattern_seed); - for (i = 0; i < c.dest_preamble_size; i++) - memset((char *) dest_preamble_addr + i, (char) rand(), 1); + for (d = 0; d < c.dest_preamble_size; d++) + memset((char *) dest_preamble_addr + d, (char) rand(), 1); } clock_gettime(CLOCK_MONOTONIC, &t_start); @@ -437,14 +438,14 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Verify byte pattern after remapping */ srand(pattern_seed); - for (i = 0; i < threshold; i++) { + for (t = 0; t < threshold; t++) { char c = (char) rand(); - if (((char *) dest_addr)[i] != c) { + if (((char *) dest_addr)[t] != c) { ksft_print_msg("Data after remap doesn't match at offset %llu\n", - i); + t); ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, - ((char *) dest_addr)[i] & 0xff); + ((char *) dest_addr)[t] & 0xff); ret = -1; goto clean_up_dest; } @@ -453,14 +454,14 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Verify the dest preamble byte pattern after remapping */ if (c.dest_preamble_size) { srand(pattern_seed); - for (i = 0; i < c.dest_preamble_size; i++) { + for (d = 0; d < c.dest_preamble_size; d++) { char c = (char) rand(); - if (((char *) dest_preamble_addr)[i] != c) { + if (((char *) dest_preamble_addr)[d] != c) { ksft_print_msg("Preamble data after remap doesn't match at offset %d\n", - i); + d); ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, - ((char *) dest_preamble_addr)[i] & 0xff); + ((char *) dest_preamble_addr)[d] & 0xff); ret = -1; goto clean_up_dest; } -- GitLab From 4dca82d14174fe53f656a6bc32398db1bdd8f481 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 15 Jan 2024 11:07:31 +0100 Subject: [PATCH 0170/1405] uprobes: use pagesize-aligned virtual address when replacing pages uprobes passes an unaligned page mapping address to folio_add_new_anon_rmap(), which ends up triggering a VM_BUG_ON() we recently extended in commit 372cbd4d5a066 ("mm: non-pmd-mappable, large folios for folio_add_new_anon_rmap()"). Arguably, this is uprobes code doing something wrong; however, for the time being it would have likely worked in rmap code because __folio_set_anon() would set folio->index to the same value. Looking at __replace_page(), we'd also pass slightly wrong values to mmu_notifier_range_init(), page_vma_mapped_walk(), flush_cache_page(), ptep_clear_flush() and set_pte_at_notify(). I suspect most of them are fine, but let's just mark the introducing commit as the one needed fixing. I don't think CC stable is warranted. We'll add more sanity checks in rmap code separately, to make sure that we always get properly aligned addresses. Link: https://lkml.kernel.org/r/20240115100731.91007-1-david@redhat.com Fixes: c517ee744b96 ("uprobes: __replace_page() should not use page_address_in_vma()") Signed-off-by: David Hildenbrand Reported-by: Jiri Olsa Closes: https://lkml.kernel.org/r/ZaMR2EWN-HvlCfUl@krava Tested-by: Jiri Olsa Reviewed-by: Ryan Roberts Acked-by: Oleg Nesterov Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Ian Rogers Cc: Adrian Hunter Signed-off-by: Andrew Morton --- kernel/events/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 485bb0389b488..929e98c629652 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -537,7 +537,7 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, } } - ret = __replace_page(vma, vaddr, old_page, new_page); + ret = __replace_page(vma, vaddr & PAGE_MASK, old_page, new_page); if (new_page) put_page(new_page); put_old: -- GitLab From c4608d1bf7c6536d1a3d233eb21e50678681564e Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Wed, 20 Dec 2023 22:59:43 -0800 Subject: [PATCH 0171/1405] mm: mmap: map MAP_STACK to VM_NOHUGEPAGE commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") incured regression for stress-ng pthread benchmark [1]. It is because THP get allocated to pthread's stack area much more possible than before. Pthread's stack area is allocated by mmap without VM_GROWSDOWN or VM_GROWSUP flag, so kernel can't tell whether it is a stack area or not. The MAP_STACK flag is used to mark the stack area, but it is a no-op on Linux. Mapping MAP_STACK to VM_NOHUGEPAGE to prevent from allocating THP for such stack area. With this change the stack area looks like: fffd18e10000-fffd19610000 rw-p 00000000 00:00 0 Size: 8192 kB KernelPageSize: 4 kB MMUPageSize: 4 kB Rss: 12 kB Pss: 12 kB Pss_Dirty: 12 kB Shared_Clean: 0 kB Shared_Dirty: 0 kB Private_Clean: 0 kB Private_Dirty: 12 kB Referenced: 12 kB Anonymous: 12 kB KSM: 0 kB LazyFree: 0 kB AnonHugePages: 0 kB ShmemPmdMapped: 0 kB FilePmdMapped: 0 kB Shared_Hugetlb: 0 kB Private_Hugetlb: 0 kB Swap: 0 kB SwapPss: 0 kB Locked: 0 kB THPeligible: 0 VmFlags: rd wr mr mw me ac nh The "nh" flag is set. [1] https://lore.kernel.org/linux-mm/202312192310.56367035-oliver.sang@intel.com/ Link: https://lkml.kernel.org/r/20231221065943.2803551-2-shy828301@gmail.com Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") Signed-off-by: Yang Shi Reported-by: kernel test robot Tested-by: Oliver Sang Reviewed-by: Yin Fengwei Cc: Rik van Riel Cc: Matthew Wilcox Cc: Christopher Lameter Cc: Huang, Ying Cc: Signed-off-by: Andrew Morton --- include/linux/mman.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/mman.h b/include/linux/mman.h index 40d94411d4920..dc7048824be81 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -156,6 +156,7 @@ calc_vm_flag_bits(unsigned long flags) return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | + _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | arch_calc_vm_flag_bits(flags); } -- GitLab From 63fd327016fdfca6f2fa27eba3496bd079eb8ed3 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 11 Jan 2024 08:29:02 -0500 Subject: [PATCH 0172/1405] mm: memcontrol: don't throttle dying tasks on memory.high While investigating hosts with high cgroup memory pressures, Tejun found culprit zombie tasks that had were holding on to a lot of memory, had SIGKILL pending, but were stuck in memory.high reclaim. In the past, we used to always force-charge allocations from tasks that were exiting in order to accelerate them dying and freeing up their rss. This changed for memory.max in a4ebf1b6ca1e ("memcg: prohibit unconditional exceeding the limit of dying tasks"); it noted that this can cause (userspace inducable) containment failures, so it added a mandatory reclaim and OOM kill cycle before forcing charges. At the time, memory.high enforcement was handled in the userspace return path, which isn't reached by dying tasks, and so memory.high was still never enforced by dying tasks. When c9afe31ec443 ("memcg: synchronously enforce memory.high for large overcharges") added synchronous reclaim for memory.high, it added unconditional memory.high enforcement for dying tasks as well. The callstack shows that this path is where the zombie is stuck in. We need to accelerate dying tasks getting past memory.high, but we cannot do it quite the same way as we do for memory.max: memory.max is enforced strictly, and tasks aren't allowed to move past it without FIRST reclaiming and OOM killing if necessary. This ensures very small levels of excess. With memory.high, though, enforcement happens lazily after the charge, and OOM killing is never triggered. A lot of concurrent threads could have pushed, or could actively be pushing, the cgroup into excess. The dying task will enter reclaim on every allocation attempt, with little hope of restoring balance. To fix this, skip synchronous memory.high enforcement on dying tasks altogether again. Update memory.high path documentation while at it. [hannes@cmpxchg.org: also handle tasks are being killed during the reclaim] Link: https://lkml.kernel.org/r/20240111192807.GA424308@cmpxchg.org Link: https://lkml.kernel.org/r/20240111132902.389862-1-hannes@cmpxchg.org Fixes: c9afe31ec443 ("memcg: synchronously enforce memory.high for large overcharges") Signed-off-by: Johannes Weiner Reported-by: Tejun Heo Reviewed-by: Yosry Ahmed Acked-by: Shakeel Butt Acked-by: Roman Gushchin Cc: Dan Schatzberg Cc: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton --- mm/memcontrol.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e4c8735e7c85c..46d8d02114cfe 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2623,8 +2623,9 @@ static unsigned long calculate_high_delay(struct mem_cgroup *memcg, } /* - * Scheduled by try_charge() to be executed from the userland return path - * and reclaims memory over the high limit. + * Reclaims memory over the high limit. Called directly from + * try_charge() (context permitting), as well as from the userland + * return path where reclaim is always able to block. */ void mem_cgroup_handle_over_high(gfp_t gfp_mask) { @@ -2643,6 +2644,17 @@ void mem_cgroup_handle_over_high(gfp_t gfp_mask) current->memcg_nr_pages_over_high = 0; retry_reclaim: + /* + * Bail if the task is already exiting. Unlike memory.max, + * memory.high enforcement isn't as strict, and there is no + * OOM killer involved, which means the excess could already + * be much bigger (and still growing) than it could for + * memory.max; the dying task could get stuck in fruitless + * reclaim for a long time, which isn't desirable. + */ + if (task_is_dying()) + goto out; + /* * The allocating task should reclaim at least the batch size, but for * subsequent retries we only want to do what's necessary to prevent oom @@ -2693,6 +2705,9 @@ void mem_cgroup_handle_over_high(gfp_t gfp_mask) } /* + * Reclaim didn't manage to push usage below the limit, slow + * this allocating task down. + * * If we exit early, we're guaranteed to die (since * schedule_timeout_killable sets TASK_KILLABLE). This means we don't * need to account for any ill-begotten jiffies to pay them off later. @@ -2887,11 +2902,17 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, } } while ((memcg = parent_mem_cgroup(memcg))); + /* + * Reclaim is set up above to be called from the userland + * return path. But also attempt synchronous reclaim to avoid + * excessive overrun while the task is still inside the + * kernel. If this is successful, the return path will see it + * when it rechecks the overage and simply bail out. + */ if (current->memcg_nr_pages_over_high > MEMCG_CHARGE_BATCH && !(current->flags & PF_MEMALLOC) && - gfpflags_allow_blocking(gfp_mask)) { + gfpflags_allow_blocking(gfp_mask)) mem_cgroup_handle_over_high(gfp_mask); - } return 0; } -- GitLab From 0d7e68c0271853c30655b05934d66cec81dc6afc Mon Sep 17 00:00:00 2001 From: Petr Vorel Date: Wed, 17 Jan 2024 13:22:57 +0100 Subject: [PATCH 0173/1405] MAINTAINERS: add man-pages git trees The maintainer uses both. Link: https://lkml.kernel.org/r/20240117122257.2707637-1-pvorel@suse.cz Signed-off-by: Petr Vorel Reviewed-by: Alejandro Colomar Signed-off-by: Andrew Morton --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a692..8c756737491ec 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12902,6 +12902,8 @@ M: Alejandro Colomar L: linux-man@vger.kernel.org S: Maintained W: http://www.kernel.org/doc/man-pages +T: git git://git.kernel.org/pub/scm/docs/man-pages/man-pages.git +T: git git://www.alejandro-colomar.es/src/alx/linux/man-pages/man-pages.git MANAGEMENT COMPONENT TRANSPORT PROTOCOL (MCTP) M: Jeremy Kerr -- GitLab From bc29036e1da1cf66e5f8312649aeec2d51ea3d86 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Tue, 16 Jan 2024 14:04:54 +0500 Subject: [PATCH 0174/1405] selftests/mm: switch to bash from sh Running charge_reserved_hugetlb.sh generates errors if sh is set to dash: ./charge_reserved_hugetlb.sh: 9: [[: not found ./charge_reserved_hugetlb.sh: 19: [[: not found ./charge_reserved_hugetlb.sh: 27: [[: not found ./charge_reserved_hugetlb.sh: 37: [[: not found ./charge_reserved_hugetlb.sh: 45: Syntax error: "(" unexpected Switch to using /bin/bash instead of /bin/sh. Make the switch for write_hugetlb_memory.sh as well which is called from charge_reserved_hugetlb.sh. Link: https://lkml.kernel.org/r/20240116090455.3407378-1-usama.anjum@collabora.com Signed-off-by: Muhammad Usama Anjum Cc: Muhammad Usama Anjum Cc: Shuah Khan Cc: David Laight Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/charge_reserved_hugetlb.sh | 2 +- tools/testing/selftests/mm/write_hugetlb_memory.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index 0899019a7fcb4..e14bdd4455f2d 100755 --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Kselftest framework requirement - SKIP code is 4. diff --git a/tools/testing/selftests/mm/write_hugetlb_memory.sh b/tools/testing/selftests/mm/write_hugetlb_memory.sh index 70a02301f4c27..3d2d2eb9d6fff 100755 --- a/tools/testing/selftests/mm/write_hugetlb_memory.sh +++ b/tools/testing/selftests/mm/write_hugetlb_memory.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 set -e -- GitLab From 9319b647902cbd5cc884ac08a8a6d54ce111fc78 Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Thu, 18 Jan 2024 10:19:53 -0800 Subject: [PATCH 0175/1405] mm/writeback: fix possible divide-by-zero in wb_dirty_limits(), again (struct dirty_throttle_control *)->thresh is an unsigned long, but is passed as the u32 divisor argument to div_u64(). On architectures where unsigned long is 64 bytes, the argument will be implicitly truncated. Use div64_u64() instead of div_u64() so that the value used in the "is this a safe division" check is the same as the divisor. Also, remove redundant cast of the numerator to u64, as that should happen implicitly. This would be difficult to exploit in memcg domain, given the ratio-based arithmetic domain_drity_limits() uses, but is much easier in global writeback domain with a BDI_CAP_STRICTLIMIT-backing device, using e.g. vm.dirty_bytes=(1<<32)*PAGE_SIZE so that dtc->thresh == (1<<32) Link: https://lkml.kernel.org/r/20240118181954.1415197-1-zokeefe@google.com Fixes: f6789593d5ce ("mm/page-writeback.c: fix divide by zero in bdi_dirty_limits()") Signed-off-by: Zach O'Keefe Cc: Maxim Patlasov Cc: Signed-off-by: Andrew Morton --- mm/page-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index cd4e4ae77c40a..02147b61712bc 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1638,7 +1638,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc) */ dtc->wb_thresh = __wb_calc_thresh(dtc); dtc->wb_bg_thresh = dtc->thresh ? - div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; + div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; /* * In order to avoid the stacked BDI deadlock we need -- GitLab From f6564fce256a3944aa1bc76cb3c40e792d97c1eb Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 18 Jan 2024 11:59:14 +0100 Subject: [PATCH 0176/1405] mm, kmsan: fix infinite recursion due to RCU critical section Alexander Potapenko writes in [1]: "For every memory access in the code instrumented by KMSAN we call kmsan_get_metadata() to obtain the metadata for the memory being accessed. For virtual memory the metadata pointers are stored in the corresponding `struct page`, therefore we need to call virt_to_page() to get them. According to the comment in arch/x86/include/asm/page.h, virt_to_page(kaddr) returns a valid pointer iff virt_addr_valid(kaddr) is true, so KMSAN needs to call virt_addr_valid() as well. To avoid recursion, kmsan_get_metadata() must not call instrumented code, therefore ./arch/x86/include/asm/kmsan.h forks parts of arch/x86/mm/physaddr.c to check whether a virtual address is valid or not. But the introduction of rcu_read_lock() to pfn_valid() added instrumented RCU API calls to virt_to_page_or_null(), which is called by kmsan_get_metadata(), so there is an infinite recursion now. I do not think it is correct to stop that recursion by doing kmsan_enter_runtime()/kmsan_exit_runtime() in kmsan_get_metadata(): that would prevent instrumented functions called from within the runtime from tracking the shadow values, which might introduce false positives." Fix the issue by switching pfn_valid() to the _sched() variant of rcu_read_lock/unlock(), which does not require calling into RCU. Given the critical section in pfn_valid() is very small, this is a reasonable trade-off (with preemptible RCU). KMSAN further needs to be careful to suppress calls into the scheduler, which would be another source of recursion. This can be done by wrapping the call to pfn_valid() into preempt_disable/enable_no_resched(). The downside is that this sacrifices breaking scheduling guarantees; however, a kernel compiled with KMSAN has already given up any performance guarantees due to being heavily instrumented. Note, KMSAN code already disables tracing via Makefile, and since mmzone.h is included, it is not necessary to use the notrace variant, which is generally preferred in all other cases. Link: https://lkml.kernel.org/r/20240115184430.2710652-1-glider@google.com [1] Link: https://lkml.kernel.org/r/20240118110022.2538350-1-elver@google.com Fixes: 5ec8e8ea8b77 ("mm/sparsemem: fix race in accessing memory_section->usage") Signed-off-by: Marco Elver Reported-by: Alexander Potapenko Reported-by: syzbot+93a9e8a3dea8d6085e12@syzkaller.appspotmail.com Reviewed-by: Alexander Potapenko Tested-by: Alexander Potapenko Cc: Charan Teja Kalla Cc: Borislav Petkov (AMD) Cc: Dave Hansen Cc: Dmitry Vyukov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/x86/include/asm/kmsan.h | 17 ++++++++++++++++- include/linux/mmzone.h | 6 +++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kmsan.h b/arch/x86/include/asm/kmsan.h index 8fa6ac0e2d766..d91b37f5b4bb4 100644 --- a/arch/x86/include/asm/kmsan.h +++ b/arch/x86/include/asm/kmsan.h @@ -64,6 +64,7 @@ static inline bool kmsan_virt_addr_valid(void *addr) { unsigned long x = (unsigned long)addr; unsigned long y = x - __START_KERNEL_map; + bool ret; /* use the carry flag to determine if x was < __START_KERNEL_map */ if (unlikely(x > y)) { @@ -79,7 +80,21 @@ static inline bool kmsan_virt_addr_valid(void *addr) return false; } - return pfn_valid(x >> PAGE_SHIFT); + /* + * pfn_valid() relies on RCU, and may call into the scheduler on exiting + * the critical section. However, this would result in recursion with + * KMSAN. Therefore, disable preemption here, and re-enable preemption + * below while suppressing reschedules to avoid recursion. + * + * Note, this sacrifices occasionally breaking scheduling guarantees. + * Although, a kernel compiled with KMSAN has already given up on any + * performance guarantees due to being heavily instrumented. + */ + preempt_disable(); + ret = pfn_valid(x >> PAGE_SHIFT); + preempt_enable_no_resched(); + + return ret; } #endif /* !MODULE */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4ed33b1278215..a497f189d9881 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -2013,9 +2013,9 @@ static inline int pfn_valid(unsigned long pfn) if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; ms = __pfn_to_section(pfn); - rcu_read_lock(); + rcu_read_lock_sched(); if (!valid_section(ms)) { - rcu_read_unlock(); + rcu_read_unlock_sched(); return 0; } /* @@ -2023,7 +2023,7 @@ static inline int pfn_valid(unsigned long pfn) * the entire section-sized span. */ ret = early_section(ms) || pfn_section_valid(ms, pfn); - rcu_read_unlock(); + rcu_read_unlock_sched(); return ret; } -- GitLab From c2a292545cd43dcab86d5290ed95b006f1cefe90 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 18 Jan 2024 12:01:29 +0100 Subject: [PATCH 0177/1405] stackdepot: add stats counters exported via debugfs Add a few basic stats counters for stack depot that can be used to derive if stack depot is working as intended. This is a snapshot of the new stats after booting a system with a KASAN-enabled kernel: $ cat /sys/kernel/debug/stackdepot/stats pools: 838 allocations: 29861 frees: 6561 in_use: 23300 freelist_size: 1840 Generally, "pools" should be well below the max; once the system is booted, "in_use" should remain relatively steady. Link: https://lkml.kernel.org/r/20240118110216.2539519-1-elver@google.com Signed-off-by: Marco Elver Reviewed-by: Andrey Konovalov Cc: Andi Kleen Cc: Dmitry Vyukov Cc: Vlastimil Babka Cc: Alexander Potapenko Signed-off-by: Andrew Morton --- lib/stackdepot.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index a0be5d05c7f08..80a8ca24ccc88 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -14,6 +14,7 @@ #define pr_fmt(fmt) "stackdepot: " fmt +#include #include #include #include @@ -115,6 +116,23 @@ static bool new_pool_required = true; /* Lock that protects the variables above. */ static DEFINE_RWLOCK(pool_rwlock); +/* Statistics counters for debugfs. */ +enum depot_counter_id { + DEPOT_COUNTER_ALLOCS, + DEPOT_COUNTER_FREES, + DEPOT_COUNTER_INUSE, + DEPOT_COUNTER_FREELIST_SIZE, + DEPOT_COUNTER_COUNT, +}; +static long counters[DEPOT_COUNTER_COUNT]; +static const char *const counter_names[] = { + [DEPOT_COUNTER_ALLOCS] = "allocations", + [DEPOT_COUNTER_FREES] = "frees", + [DEPOT_COUNTER_INUSE] = "in_use", + [DEPOT_COUNTER_FREELIST_SIZE] = "freelist_size", +}; +static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT); + static int __init disable_stack_depot(char *str) { return kstrtobool(str, &stack_depot_disabled); @@ -277,6 +295,7 @@ static void depot_init_pool(void *pool) stack->handle.extra = 0; list_add(&stack->list, &free_stacks); + counters[DEPOT_COUNTER_FREELIST_SIZE]++; } /* Save reference to the pool to be used by depot_fetch_stack(). */ @@ -376,6 +395,7 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) /* Get and unlink the first entry from the freelist. */ stack = list_first_entry(&free_stacks, struct stack_record, list); list_del(&stack->list); + counters[DEPOT_COUNTER_FREELIST_SIZE]--; /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ if (size > CONFIG_STACKDEPOT_MAX_FRAMES) @@ -394,6 +414,8 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) */ kmsan_unpoison_memory(stack, DEPOT_STACK_RECORD_SIZE); + counters[DEPOT_COUNTER_ALLOCS]++; + counters[DEPOT_COUNTER_INUSE]++; return stack; } @@ -426,6 +448,10 @@ static void depot_free_stack(struct stack_record *stack) lockdep_assert_held_write(&pool_rwlock); list_add(&stack->list, &free_stacks); + + counters[DEPOT_COUNTER_FREELIST_SIZE]++; + counters[DEPOT_COUNTER_FREES]++; + counters[DEPOT_COUNTER_INUSE]--; } /* Calculates the hash for a stack. */ @@ -690,3 +716,30 @@ unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) return parts.extra; } EXPORT_SYMBOL(stack_depot_get_extra_bits); + +static int stats_show(struct seq_file *seq, void *v) +{ + /* + * data race ok: These are just statistics counters, and approximate + * statistics are ok for debugging. + */ + seq_printf(seq, "pools: %d\n", data_race(pools_num)); + for (int i = 0; i < DEPOT_COUNTER_COUNT; i++) + seq_printf(seq, "%s: %ld\n", counter_names[i], data_race(counters[i])); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(stats); + +static int depot_debugfs_init(void) +{ + struct dentry *dir; + + if (stack_depot_disabled) + return 0; + + dir = debugfs_create_dir("stackdepot", NULL); + debugfs_create_file("stats", 0444, dir, NULL, &stats_fops); + return 0; +} +late_initcall(depot_debugfs_init); -- GitLab From 4434a56ec20925333d6cf4d4093641d063abd35b Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 18 Jan 2024 12:01:30 +0100 Subject: [PATCH 0178/1405] stackdepot: make fast paths lock-less again With the introduction of the pool_rwlock (reader-writer lock), several fast paths end up taking the pool_rwlock as readers. Furthermore, stack_depot_put() unconditionally takes the pool_rwlock as a writer. Despite allowing readers to make forward-progress concurrently, reader-writer locks have inherent cache contention issues, which does not scale well on systems with large CPU counts. Rework the synchronization story of stack depot to again avoid taking any locks in the fast paths. This is done by relying on RCU-protected list traversal, and the NMI-safe subset of RCU to delay reuse of freed stack records. See code comments for more details. Along with the performance issues, this also fixes incorrect nesting of rwlock within a raw_spinlock, given that stack depot should still be usable from anywhere: | [ BUG: Invalid wait context ] | ----------------------------- | swapper/0/1 is trying to lock: | ffffffff89869be8 (pool_rwlock){..--}-{3:3}, at: stack_depot_save_flags | other info that might help us debug this: | context-{5:5} | 2 locks held by swapper/0/1: | #0: ffffffff89632440 (rcu_read_lock){....}-{1:3}, at: __queue_work | #1: ffff888100092018 (&pool->lock){-.-.}-{2:2}, at: __queue_work <-- raw_spin_lock Stack depot usage stats are similar to the previous version after a KASAN kernel boot: $ cat /sys/kernel/debug/stackdepot/stats pools: 838 allocations: 29865 frees: 6604 in_use: 23261 freelist_size: 1879 The number of pools is the same as previously. The freelist size is minimally larger, but this may also be due to variance across system boots. This shows that even though we do not eagerly wait for the next RCU grace period (such as with synchronize_rcu() or call_rcu()) after freeing a stack record - requiring depot_pop_free() to "poll" if an entry may be used - new allocations are very likely to happen in later RCU grace periods. Link: https://lkml.kernel.org/r/20240118110216.2539519-2-elver@google.com Fixes: 108be8def46e ("lib/stackdepot: allow users to evict stack traces") Reported-by: Andi Kleen Signed-off-by: Marco Elver Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 322 +++++++++++++++++++++++++++++++---------------- 1 file changed, 211 insertions(+), 111 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 80a8ca24ccc88..5caa1f5665538 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -22,8 +22,9 @@ #include #include #include -#include #include +#include +#include #include #include #include @@ -68,12 +69,28 @@ union handle_parts { }; struct stack_record { - struct list_head list; /* Links in hash table or freelist */ + struct list_head hash_list; /* Links in the hash table */ u32 hash; /* Hash in hash table */ u32 size; /* Number of stored frames */ - union handle_parts handle; + union handle_parts handle; /* Constant after initialization */ refcount_t count; - unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ + union { + unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ + struct { + /* + * An important invariant of the implementation is to + * only place a stack record onto the freelist iff its + * refcount is zero. Because stack records with a zero + * refcount are never considered as valid, it is safe to + * union @entries and freelist management state below. + * Conversely, as soon as an entry is off the freelist + * and its refcount becomes non-zero, the below must not + * be accessed until being placed back on the freelist. + */ + struct list_head free_list; /* Links in the freelist */ + unsigned long rcu_state; /* RCU cookie */ + }; + }; }; #define DEPOT_STACK_RECORD_SIZE \ @@ -113,8 +130,8 @@ static LIST_HEAD(free_stacks); * yet allocated or if the limit on the number of pools is reached. */ static bool new_pool_required = true; -/* Lock that protects the variables above. */ -static DEFINE_RWLOCK(pool_rwlock); +/* The lock must be held when performing pool or freelist modifications. */ +static DEFINE_RAW_SPINLOCK(pool_lock); /* Statistics counters for debugfs. */ enum depot_counter_id { @@ -276,14 +293,15 @@ int stack_depot_init(void) } EXPORT_SYMBOL_GPL(stack_depot_init); -/* Initializes a stack depol pool. */ +/* + * Initializes new stack depot @pool, release all its entries to the freelist, + * and update the list of pools. + */ static void depot_init_pool(void *pool) { int offset; - lockdep_assert_held_write(&pool_rwlock); - - WARN_ON(!list_empty(&free_stacks)); + lockdep_assert_held(&pool_lock); /* Initialize handles and link stack records into the freelist. */ for (offset = 0; offset <= DEPOT_POOL_SIZE - DEPOT_STACK_RECORD_SIZE; @@ -294,19 +312,36 @@ static void depot_init_pool(void *pool) stack->handle.offset = offset >> DEPOT_STACK_ALIGN; stack->handle.extra = 0; - list_add(&stack->list, &free_stacks); + /* + * Stack traces of size 0 are never saved, and we can simply use + * the size field as an indicator if this is a new unused stack + * record in the freelist. + */ + stack->size = 0; + + INIT_LIST_HEAD(&stack->hash_list); + /* + * Add to the freelist front to prioritize never-used entries: + * required in case there are entries in the freelist, but their + * RCU cookie still belongs to the current RCU grace period + * (there can still be concurrent readers). + */ + list_add(&stack->free_list, &free_stacks); counters[DEPOT_COUNTER_FREELIST_SIZE]++; } /* Save reference to the pool to be used by depot_fetch_stack(). */ stack_pools[pools_num] = pool; - pools_num++; + + /* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */ + WRITE_ONCE(pools_num, pools_num + 1); + ASSERT_EXCLUSIVE_WRITER(pools_num); } /* Keeps the preallocated memory to be used for a new stack depot pool. */ static void depot_keep_new_pool(void **prealloc) { - lockdep_assert_held_write(&pool_rwlock); + lockdep_assert_held(&pool_lock); /* * If a new pool is already saved or the maximum number of @@ -329,17 +364,16 @@ static void depot_keep_new_pool(void **prealloc) * number of pools is reached. In either case, take note that * keeping another pool is not required. */ - new_pool_required = false; + WRITE_ONCE(new_pool_required, false); } -/* Updates references to the current and the next stack depot pools. */ -static bool depot_update_pools(void **prealloc) +/* + * Try to initialize a new stack depot pool from either a previous or the + * current pre-allocation, and release all its entries to the freelist. + */ +static bool depot_try_init_pool(void **prealloc) { - lockdep_assert_held_write(&pool_rwlock); - - /* Check if we still have objects in the freelist. */ - if (!list_empty(&free_stacks)) - goto out_keep_prealloc; + lockdep_assert_held(&pool_lock); /* Check if we have a new pool saved and use it. */ if (new_pool) { @@ -348,10 +382,9 @@ static bool depot_update_pools(void **prealloc) /* Take note that we might need a new new_pool. */ if (pools_num < DEPOT_MAX_POOLS) - new_pool_required = true; + WRITE_ONCE(new_pool_required, true); - /* Try keeping the preallocated memory for new_pool. */ - goto out_keep_prealloc; + return true; } /* Bail out if we reached the pool limit. */ @@ -368,12 +401,32 @@ static bool depot_update_pools(void **prealloc) } return false; +} + +/* Try to find next free usable entry. */ +static struct stack_record *depot_pop_free(void) +{ + struct stack_record *stack; -out_keep_prealloc: - /* Keep the preallocated memory for a new pool if required. */ - if (*prealloc) - depot_keep_new_pool(prealloc); - return true; + lockdep_assert_held(&pool_lock); + + if (list_empty(&free_stacks)) + return NULL; + + /* + * We maintain the invariant that the elements in front are least + * recently used, and are therefore more likely to be associated with an + * RCU grace period in the past. Consequently it is sufficient to only + * check the first entry. + */ + stack = list_first_entry(&free_stacks, struct stack_record, free_list); + if (stack->size && !poll_state_synchronize_rcu(stack->rcu_state)) + return NULL; + + list_del(&stack->free_list); + counters[DEPOT_COUNTER_FREELIST_SIZE]--; + + return stack; } /* Allocates a new stack in a stack depot pool. */ @@ -382,20 +435,22 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) { struct stack_record *stack; - lockdep_assert_held_write(&pool_rwlock); + lockdep_assert_held(&pool_lock); - /* Update current and new pools if required and possible. */ - if (!depot_update_pools(prealloc)) + /* This should already be checked by public API entry points. */ + if (WARN_ON_ONCE(!size)) return NULL; /* Check if we have a stack record to save the stack trace. */ - if (list_empty(&free_stacks)) - return NULL; - - /* Get and unlink the first entry from the freelist. */ - stack = list_first_entry(&free_stacks, struct stack_record, list); - list_del(&stack->list); - counters[DEPOT_COUNTER_FREELIST_SIZE]--; + stack = depot_pop_free(); + if (!stack) { + /* No usable entries on the freelist - try to refill the freelist. */ + if (!depot_try_init_pool(prealloc)) + return NULL; + stack = depot_pop_free(); + if (WARN_ON(!stack)) + return NULL; + } /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ if (size > CONFIG_STACKDEPOT_MAX_FRAMES) @@ -421,37 +476,73 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) { + const int pools_num_cached = READ_ONCE(pools_num); union handle_parts parts = { .handle = handle }; void *pool; size_t offset = parts.offset << DEPOT_STACK_ALIGN; struct stack_record *stack; - lockdep_assert_held(&pool_rwlock); + lockdep_assert_not_held(&pool_lock); - if (parts.pool_index > pools_num) { + if (parts.pool_index > pools_num_cached) { WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", - parts.pool_index, pools_num, handle); + parts.pool_index, pools_num_cached, handle); return NULL; } pool = stack_pools[parts.pool_index]; - if (!pool) + if (WARN_ON(!pool)) return NULL; stack = pool + offset; + if (WARN_ON(!refcount_read(&stack->count))) + return NULL; + return stack; } /* Links stack into the freelist. */ static void depot_free_stack(struct stack_record *stack) { - lockdep_assert_held_write(&pool_rwlock); + unsigned long flags; + + lockdep_assert_not_held(&pool_lock); + + raw_spin_lock_irqsave(&pool_lock, flags); + printk_deferred_enter(); - list_add(&stack->list, &free_stacks); + /* + * Remove the entry from the hash list. Concurrent list traversal may + * still observe the entry, but since the refcount is zero, this entry + * will no longer be considered as valid. + */ + list_del_rcu(&stack->hash_list); + + /* + * Due to being used from constrained contexts such as the allocators, + * NMI, or even RCU itself, stack depot cannot rely on primitives that + * would sleep (such as synchronize_rcu()) or recursively call into + * stack depot again (such as call_rcu()). + * + * Instead, get an RCU cookie, so that we can ensure this entry isn't + * moved onto another list until the next grace period, and concurrent + * RCU list traversal remains safe. + */ + stack->rcu_state = get_state_synchronize_rcu(); + + /* + * Add the entry to the freelist tail, so that older entries are + * considered first - their RCU cookie is more likely to no longer be + * associated with the current grace period. + */ + list_add_tail(&stack->free_list, &free_stacks); counters[DEPOT_COUNTER_FREELIST_SIZE]++; counters[DEPOT_COUNTER_FREES]++; counters[DEPOT_COUNTER_INUSE]--; + + printk_deferred_exit(); + raw_spin_unlock_irqrestore(&pool_lock, flags); } /* Calculates the hash for a stack. */ @@ -479,22 +570,52 @@ int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, /* Finds a stack in a bucket of the hash table. */ static inline struct stack_record *find_stack(struct list_head *bucket, - unsigned long *entries, int size, - u32 hash) + unsigned long *entries, int size, + u32 hash, depot_flags_t flags) { - struct list_head *pos; - struct stack_record *found; + struct stack_record *stack, *ret = NULL; + + /* + * Stack depot may be used from instrumentation that instruments RCU or + * tracing itself; use variant that does not call into RCU and cannot be + * traced. + * + * Note: Such use cases must take care when using refcounting to evict + * unused entries, because the stack record free-then-reuse code paths + * do call into RCU. + */ + rcu_read_lock_sched_notrace(); + + list_for_each_entry_rcu(stack, bucket, hash_list) { + if (stack->hash != hash || stack->size != size) + continue; - lockdep_assert_held(&pool_rwlock); + /* + * This may race with depot_free_stack() accessing the freelist + * management state unioned with @entries. The refcount is zero + * in that case and the below refcount_inc_not_zero() will fail. + */ + if (data_race(stackdepot_memcmp(entries, stack->entries, size))) + continue; - list_for_each(pos, bucket) { - found = list_entry(pos, struct stack_record, list); - if (found->hash == hash && - found->size == size && - !stackdepot_memcmp(entries, found->entries, size)) - return found; + /* + * Try to increment refcount. If this succeeds, the stack record + * is valid and has not yet been freed. + * + * If STACK_DEPOT_FLAG_GET is not used, it is undefined behavior + * to then call stack_depot_put() later, and we can assume that + * a stack record is never placed back on the freelist. + */ + if ((flags & STACK_DEPOT_FLAG_GET) && !refcount_inc_not_zero(&stack->count)) + continue; + + ret = stack; + break; } - return NULL; + + rcu_read_unlock_sched_notrace(); + + return ret; } depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, @@ -508,7 +629,6 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, struct page *page = NULL; void *prealloc = NULL; bool can_alloc = depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC; - bool need_alloc = false; unsigned long flags; u32 hash; @@ -531,31 +651,16 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, hash = hash_stack(entries, nr_entries); bucket = &stack_table[hash & stack_hash_mask]; - read_lock_irqsave(&pool_rwlock, flags); - printk_deferred_enter(); - - /* Fast path: look the stack trace up without full locking. */ - found = find_stack(bucket, entries, nr_entries, hash); - if (found) { - if (depot_flags & STACK_DEPOT_FLAG_GET) - refcount_inc(&found->count); - printk_deferred_exit(); - read_unlock_irqrestore(&pool_rwlock, flags); + /* Fast path: look the stack trace up without locking. */ + found = find_stack(bucket, entries, nr_entries, hash, depot_flags); + if (found) goto exit; - } - - /* Take note if another stack pool needs to be allocated. */ - if (new_pool_required) - need_alloc = true; - - printk_deferred_exit(); - read_unlock_irqrestore(&pool_rwlock, flags); /* * Allocate memory for a new pool if required now: * we won't be able to do that under the lock. */ - if (unlikely(can_alloc && need_alloc)) { + if (unlikely(can_alloc && READ_ONCE(new_pool_required))) { /* * Zero out zone modifiers, as we don't have specific zone * requirements. Keep the flags related to allocation in atomic @@ -569,31 +674,36 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, prealloc = page_address(page); } - write_lock_irqsave(&pool_rwlock, flags); + raw_spin_lock_irqsave(&pool_lock, flags); printk_deferred_enter(); - found = find_stack(bucket, entries, nr_entries, hash); + /* Try to find again, to avoid concurrently inserting duplicates. */ + found = find_stack(bucket, entries, nr_entries, hash, depot_flags); if (!found) { struct stack_record *new = depot_alloc_stack(entries, nr_entries, hash, &prealloc); if (new) { - list_add(&new->list, bucket); + /* + * This releases the stack record into the bucket and + * makes it visible to readers in find_stack(). + */ + list_add_rcu(&new->hash_list, bucket); found = new; } - } else { - if (depot_flags & STACK_DEPOT_FLAG_GET) - refcount_inc(&found->count); + } + + if (prealloc) { /* - * Stack depot already contains this stack trace, but let's - * keep the preallocated memory for future. + * Either stack depot already contains this stack trace, or + * depot_alloc_stack() did not consume the preallocated memory. + * Try to keep the preallocated memory for future. */ - if (prealloc) - depot_keep_new_pool(&prealloc); + depot_keep_new_pool(&prealloc); } printk_deferred_exit(); - write_unlock_irqrestore(&pool_rwlock, flags); + raw_spin_unlock_irqrestore(&pool_lock, flags); exit: if (prealloc) { /* Stack depot didn't use this memory, free it. */ @@ -618,7 +728,6 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries) { struct stack_record *stack; - unsigned long flags; *entries = NULL; /* @@ -630,13 +739,13 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, if (!handle || stack_depot_disabled) return 0; - read_lock_irqsave(&pool_rwlock, flags); - printk_deferred_enter(); - stack = depot_fetch_stack(handle); - - printk_deferred_exit(); - read_unlock_irqrestore(&pool_rwlock, flags); + /* + * Should never be NULL, otherwise this is a use-after-put (or just a + * corrupt handle). + */ + if (WARN(!stack, "corrupt handle or use after stack_depot_put()")) + return 0; *entries = stack->entries; return stack->size; @@ -646,29 +755,20 @@ EXPORT_SYMBOL_GPL(stack_depot_fetch); void stack_depot_put(depot_stack_handle_t handle) { struct stack_record *stack; - unsigned long flags; if (!handle || stack_depot_disabled) return; - write_lock_irqsave(&pool_rwlock, flags); - printk_deferred_enter(); - stack = depot_fetch_stack(handle); - if (WARN_ON(!stack)) - goto out; - - if (refcount_dec_and_test(&stack->count)) { - /* Unlink stack from the hash table. */ - list_del(&stack->list); + /* + * Should always be able to find the stack record, otherwise this is an + * unbalanced put attempt (or corrupt handle). + */ + if (WARN(!stack, "corrupt handle or unbalanced stack_depot_put()")) + return; - /* Free stack. */ + if (refcount_dec_and_test(&stack->count)) depot_free_stack(stack); - } - -out: - printk_deferred_exit(); - write_unlock_irqrestore(&pool_rwlock, flags); } EXPORT_SYMBOL_GPL(stack_depot_put); -- GitLab From 0fe8ff51efb3fe5cb25da13229f95aa709613cb3 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Wed, 17 Jan 2024 18:21:52 +0000 Subject: [PATCH 0179/1405] MAINTAINERS: supplement of zswap maintainers update As discussed on the mailing list [1], merge the zpool maintainers entry into the zswap one. Also, add CREDITS entries for previous zswap/zpool maintainers. [1] https://lore.kernel.org/linux-mm/CAJD7tkYx4YWhGoVwnSeGc8dY_1aRRxxg8PzWBV==A6iqG_OgFw@mail.gmail.com/ Link: https://lkml.kernel.org/r/20240117182152.1439822-1-yosryahmed@google.com Signed-off-by: Yosry Ahmed Acked-by: Nhat Pham Acked-by: Dan Streetman Acked-by: Seth Jennings Acked-by: Johannes Weiner Cc: Vitaly Wool Signed-off-by: Andrew Morton --- CREDITS | 13 +++++++++++++ MAINTAINERS | 9 ++------- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/CREDITS b/CREDITS index 5797e8f7e92b0..df8d6946739f6 100644 --- a/CREDITS +++ b/CREDITS @@ -2161,6 +2161,19 @@ N: Mike Kravetz E: mike.kravetz@oracle.com D: Maintenance and development of the hugetlb subsystem +N: Seth Jennings +E: sjenning@redhat.com +D: Creation and maintenance of zswap + +N: Dan Streetman +E: ddstreet@ieee.org +D: Maintenance and development of zswap +D: Creation and maintenance of the zpool API + +N: Vitaly Wool +E: vitaly.wool@konsulko.com +D: Maintenance and development of zswap + N: Andreas S. Krebs E: akrebs@altavista.net D: CYPRESS CY82C693 chipset IDE, Digital's PC-Alpha 164SX boards diff --git a/MAINTAINERS b/MAINTAINERS index 8c756737491ec..fecebfc4c0dc6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -24341,13 +24341,6 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/zonefs.git F: Documentation/filesystems/zonefs.rst F: fs/zonefs/ -ZPOOL COMPRESSED PAGE STORAGE API -M: Dan Streetman -L: linux-mm@kvack.org -S: Maintained -F: include/linux/zpool.h -F: mm/zpool.c - ZR36067 VIDEO FOR LINUX DRIVER M: Corentin Labbe L: mjpeg-users@lists.sourceforge.net @@ -24399,7 +24392,9 @@ M: Nhat Pham L: linux-mm@kvack.org S: Maintained F: Documentation/admin-guide/mm/zswap.rst +F: include/linux/zpool.h F: include/linux/zswap.h +F: mm/zpool.c F: mm/zswap.c THE REST -- GitLab From 91b80cc5b39f00399e8e2d17527cad2c7fa535e2 Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Fri, 19 Jan 2024 06:14:29 -0700 Subject: [PATCH 0180/1405] selftests: mm: fix map_hugetlb failure on 64K page size systems On systems with 64k page size and 512M huge page sizes, the allocation and test succeeds but errors out at the munmap. As the comment states, munmap will failure if its not HUGEPAGE aligned. This is due to the length of the mapping being 1/2 the size of the hugepage causing the munmap to not be hugepage aligned. Fix this by making the mapping length the full hugepage if the hugepage is larger than the length of the mapping. Link: https://lkml.kernel.org/r/20240119131429.172448-1-npache@redhat.com Signed-off-by: Nico Pache Cc: Donet Tom Cc: Shuah Khan Cc: Christophe Leroy Cc: Michael Ellerman Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/map_hugetlb.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/mm/map_hugetlb.c b/tools/testing/selftests/mm/map_hugetlb.c index 193281560b61b..86e8f2048a409 100644 --- a/tools/testing/selftests/mm/map_hugetlb.c +++ b/tools/testing/selftests/mm/map_hugetlb.c @@ -15,6 +15,7 @@ #include #include #include +#include "vm_util.h" #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) @@ -58,10 +59,16 @@ int main(int argc, char **argv) { void *addr; int ret; + size_t hugepage_size; size_t length = LENGTH; int flags = FLAGS; int shift = 0; + hugepage_size = default_huge_page_size(); + /* munmap with fail if the length is not page aligned */ + if (hugepage_size > length) + length = hugepage_size; + if (argc > 1) length = atol(argv[1]) << 20; if (argc > 2) { -- GitLab From 52e63d67b5bb423b33d7a262ac7f8bd375a90145 Mon Sep 17 00:00:00 2001 From: Audra Mitchell Date: Fri, 19 Jan 2024 15:58:01 -0500 Subject: [PATCH 0181/1405] selftests/mm: Update va_high_addr_switch.sh to check CPU for la57 flag In order for the page table level 5 to be in use, the CPU must have the setting enabled in addition to the CONFIG option. Check for the flag to be set to avoid false test failures on systems that do not have this cpu flag set. The test does a series of mmap calls including three using the MAP_FIXED flag and specifying an address that is 1<<47 or 1<<48. These addresses are only available if you are using level 5 page tables, which requires both the CPU to have the capabiltiy (la57 flag) and the kernel to be configured. Currently the test only checks for the kernel configuration option, so this test can still report a false positive. Here are the three failing lines: $ ./va_high_addr_switch | grep FAILED mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED): 0xffffffffffffffff - FAILED mmap(HIGH_ADDR, MAP_FIXED): 0xffffffffffffffff - FAILED mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED): 0xffffffffffffffff - FAILED I thought (for about a second) refactoring the test so that these three mmap calls will only be run on systems with the level 5 page tables available, but the whole point of the test is to check the level 5 feature... Link: https://lkml.kernel.org/r/20240119205801.62769-1-audra@redhat.com Fixes: 4f2930c6718a ("selftests/vm: only run 128TBswitch with 5-level paging") Signed-off-by: Audra Mitchell Cc: Rafael Aquini Cc: Shuah Khan Cc: Adam Sindelar Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/va_high_addr_switch.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh index 45cae7cab27e1..a0a75f3029043 100755 --- a/tools/testing/selftests/mm/va_high_addr_switch.sh +++ b/tools/testing/selftests/mm/va_high_addr_switch.sh @@ -29,9 +29,15 @@ check_supported_x86_64() # See man 1 gzip under '-f'. local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2) + local cpu_supports_pl5=$(awk '/^flags/ {if (/la57/) {print 0;} + else {print 1}; exit}' /proc/cpuinfo 2>/dev/null) + if [[ "${pg_table_levels}" -lt 5 ]]; then echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" exit $ksft_skip + elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then + echo "$0: CPU does not have the necessary la57 flag to support page table level 5" + exit $ksft_skip fi } -- GitLab From db44c658f798ad907219f15e033229b8d1aadb93 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 22 Jan 2024 18:54:07 +0100 Subject: [PATCH 0182/1405] mm/huge_memory: fix folio_set_dirty() vs. folio_mark_dirty() The correct folio replacement for "set_page_dirty()" is "folio_mark_dirty()", not "folio_set_dirty()". Using the latter won't properly inform the FS using the dirty_folio() callback. This has been found by code inspection, but likely this can result in some real trouble. Link: https://lkml.kernel.org/r/20240122175407.307992-1-david@redhat.com Fixes: a8e61d584eda0 ("mm/huge_memory: page_remove_rmap() -> folio_remove_rmap_pmd()") Signed-off-by: David Hildenbrand Cc: Ryan Roberts Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- mm/huge_memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 94ef5c02b4596..5f31e51f22356 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2437,7 +2437,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, page = pmd_page(old_pmd); folio = page_folio(page); if (!folio_test_dirty(folio) && pmd_dirty(old_pmd)) - folio_set_dirty(folio); + folio_mark_dirty(folio); if (!folio_test_referenced(folio) && pmd_young(old_pmd)) folio_set_referenced(folio); folio_remove_rmap_pmd(folio, page, vma); @@ -3563,7 +3563,7 @@ int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, } if (pmd_dirty(pmdval)) - folio_set_dirty(folio); + folio_mark_dirty(folio); if (pmd_write(pmdval)) entry = make_writable_migration_entry(page_to_pfn(page)); else if (anon_exclusive) -- GitLab From e4e3df290f65da6cb27dac1309389c916f27db1a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 22 Jan 2024 18:17:51 +0100 Subject: [PATCH 0183/1405] mm/memory: fix folio_set_dirty() vs. folio_mark_dirty() in zap_pte_range() The correct folio replacement for "set_page_dirty()" is "folio_mark_dirty()", not "folio_set_dirty()". Using the latter won't properly inform the FS using the dirty_folio() callback. This has been found by code inspection, but likely this can result in some real trouble when zapping dirty PTEs that point at clean pagecache folios. Yuezhang Mo said: "Without this fix, testing the latest exfat with xfstests, test cases generic/029 and generic/030 will fail." Link: https://lkml.kernel.org/r/20240122171751.272074-1-david@redhat.com Fixes: c46265030b0f ("mm/memory: page_remove_rmap() -> folio_remove_rmap_pte()") Signed-off-by: David Hildenbrand Reported-by: Ryan Roberts Closes: https://lkml.kernel.org/r/2445cedb-61fb-422c-8bfb-caf0a2beed62@arm.com Reviewed-by: Ryan Roberts Cc: Matthew Wilcox (Oracle) Reviewed-by: Yuezhang Mo Signed-off-by: Andrew Morton --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 7e1f4849463aa..89bcae0b224d6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1464,7 +1464,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, delay_rmap = 0; if (!folio_test_anon(folio)) { if (pte_dirty(ptent)) { - folio_set_dirty(folio); + folio_mark_dirty(folio); if (tlb_delay_rmap(tlb)) { delay_rmap = 1; force_flush = 1; -- GitLab From 6f9dc684cae638dda0570154509884ee78d0f75c Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Mon, 22 Jan 2024 09:52:01 -0800 Subject: [PATCH 0184/1405] scs: add CONFIG_MMU dependency for vfree_atomic() The shadow call stack implementation fails to build without CONFIG_MMU: ld.lld: error: undefined symbol: vfree_atomic >>> referenced by scs.c >>> kernel/scs.o:(scs_free) in archive vmlinux.a Link: https://lkml.kernel.org/r/20240122175204.2371009-1-samuel.holland@sifive.com Fixes: a2abe7cbd8fe ("scs: switch to vmapped shadow stacks") Signed-off-by: Samuel Holland Reviewed-by: Sami Tolvanen Cc: Will Deacon Cc: Signed-off-by: Andrew Morton --- arch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/Kconfig b/arch/Kconfig index c91917b508736..a5af0edd3eb8f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -673,6 +673,7 @@ config SHADOW_CALL_STACK bool "Shadow Call Stack" depends on ARCH_SUPPORTS_SHADOW_CALL_STACK depends on DYNAMIC_FTRACE_WITH_ARGS || DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER + depends on MMU help This option enables the compiler's Shadow Call Stack, which uses a shadow stack to protect function return addresses from -- GitLab From d021b442cf312664811783e92b3d5e4548e92a53 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 22 Jan 2024 12:05:54 +0000 Subject: [PATCH 0185/1405] selftests/mm: ksm_tests should only MADV_HUGEPAGE valid memory ksm_tests was previously mmapping a region of memory, aligning the returned pointer to a PMD boundary, then setting MADV_HUGEPAGE, but was setting it past the end of the mmapped area due to not taking the pointer alignment into consideration. Fix this behaviour. Up until commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries"), this buggy behavior was (usually) masked because the alignment difference was always less than PMD-size. But since the mentioned commit, `ksm_tests -H -s 100` started failing. Link: https://lkml.kernel.org/r/20240122120554.3108022-1-ryan.roberts@arm.com Fixes: 325254899684 ("selftests: vm: add KSM huge pages merging time test") Signed-off-by: Ryan Roberts Cc: Pedro Demarchi Gomes Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/ksm_tests.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c index 380b691d3eb9f..b748c48908d9d 100644 --- a/tools/testing/selftests/mm/ksm_tests.c +++ b/tools/testing/selftests/mm/ksm_tests.c @@ -566,7 +566,7 @@ static int ksm_merge_hugepages_time(int merge_type, int mapping, int prot, if (map_ptr_orig == MAP_FAILED) err(2, "initial mmap"); - if (madvise(map_ptr, len + HPAGE_SIZE, MADV_HUGEPAGE)) + if (madvise(map_ptr, len, MADV_HUGEPAGE)) err(2, "MADV_HUGEPAGE"); pagemap_fd = open("/proc/self/pagemap", O_RDONLY); -- GitLab From 67695f18d55924b2013534ef3bdc363bc9e14605 Mon Sep 17 00:00:00 2001 From: Lokesh Gidra Date: Wed, 17 Jan 2024 14:37:29 -0800 Subject: [PATCH 0186/1405] userfaultfd: fix mmap_changing checking in mfill_atomic_hugetlb In mfill_atomic_hugetlb(), mmap_changing isn't being checked again if we drop mmap_lock and reacquire it. When the lock is not held, mmap_changing could have been incremented. This is also inconsistent with the behavior in mfill_atomic(). Link: https://lkml.kernel.org/r/20240117223729.1444522-1-lokeshgidra@google.com Fixes: df2cc96e77011 ("userfaultfd: prevent non-cooperative events vs mcopy_atomic races") Signed-off-by: Lokesh Gidra Cc: Andrea Arcangeli Cc: Mike Rapoport Cc: Axel Rasmussen Cc: Brian Geffon Cc: David Hildenbrand Cc: Jann Horn Cc: Kalesh Singh Cc: Matthew Wilcox (Oracle) Cc: Nicolas Geoffray Cc: Peter Xu Cc: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 20e3b0d9cf7ed..75fcf1f783bc5 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -357,6 +357,7 @@ static __always_inline ssize_t mfill_atomic_hugetlb( unsigned long dst_start, unsigned long src_start, unsigned long len, + atomic_t *mmap_changing, uffd_flags_t flags) { struct mm_struct *dst_mm = dst_vma->vm_mm; @@ -472,6 +473,15 @@ static __always_inline ssize_t mfill_atomic_hugetlb( goto out; } mmap_read_lock(dst_mm); + /* + * If memory mappings are changing because of non-cooperative + * operation (e.g. mremap) running in parallel, bail out and + * request the user to retry later + */ + if (mmap_changing && atomic_read(mmap_changing)) { + err = -EAGAIN; + break; + } dst_vma = NULL; goto retry; @@ -506,6 +516,7 @@ extern ssize_t mfill_atomic_hugetlb(struct vm_area_struct *dst_vma, unsigned long dst_start, unsigned long src_start, unsigned long len, + atomic_t *mmap_changing, uffd_flags_t flags); #endif /* CONFIG_HUGETLB_PAGE */ @@ -622,8 +633,8 @@ static __always_inline ssize_t mfill_atomic(struct mm_struct *dst_mm, * If this is a HUGETLB vma, pass off to appropriate routine */ if (is_vm_hugetlb_page(dst_vma)) - return mfill_atomic_hugetlb(dst_vma, dst_start, - src_start, len, flags); + return mfill_atomic_hugetlb(dst_vma, dst_start, src_start, + len, mmap_changing, flags); if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) goto out_unlock; -- GitLab From 4ef9ad19e17676b9ef071309bc62020e2373705d Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 18 Jan 2024 10:05:05 -0800 Subject: [PATCH 0187/1405] mm: huge_memory: don't force huge page alignment on 32 bit commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") caused two issues [1] [2] reported on 32 bit system or compat userspace. It doesn't make too much sense to force huge page alignment on 32 bit system due to the constrained virtual address space. [1] https://lore.kernel.org/linux-mm/d0a136a0-4a31-46bc-adf4-2db109a61672@kernel.org/ [2] https://lore.kernel.org/linux-mm/CAJuCfpHXLdQy1a2B6xN2d7quTYwg2OoZseYPZTRpU0eHHKD-sQ@mail.gmail.com/ Link: https://lkml.kernel.org/r/20240118180505.2914778-1-shy828301@gmail.com Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") Signed-off-by: Yang Shi Reported-by: Jiri Slaby Reported-by: Suren Baghdasaryan Tested-by: Jiri Slaby Tested-by: Suren Baghdasaryan Reviewed-by: Matthew Wilcox (Oracle) Cc: Rik van Riel Cc: Christopher Lameter Signed-off-by: Andrew Morton --- mm/huge_memory.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5f31e51f22356..172b85208276e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -811,6 +812,9 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, loff_t off_align = round_up(off, size); unsigned long len_pad, ret; + if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall()) + return 0; + if (off_end <= off_align || (off_end - off_align) < size) return 0; -- GitLab From 9b3058d1f456464a02e202cc7fc660508709097c Mon Sep 17 00:00:00 2001 From: Gregory Greenman Date: Tue, 2 Jan 2024 14:20:19 +0200 Subject: [PATCH 0188/1405] MAINTAINERS: remove myself as iwlwifi driver maintainer As I'm resigning from Intel, it's time to remove myself as a maintainer of iwlwifi. Good luck to Miri! Signed-off-by: Gregory Greenman Link: https://msgid.link/20240102122019.1689602-1-gregory.greenman@intel.com Signed-off-by: Johannes Berg --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 92152ac346c8d..d8ca0e10c8d10 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11128,7 +11128,6 @@ S: Supported F: drivers/net/wireless/intel/iwlegacy/ INTEL WIRELESS WIFI LINK (iwlwifi) -M: Gregory Greenman M: Miri Korenblit L: linux-wireless@vger.kernel.org S: Supported -- GitLab From e1d54d153fc3e697b841999df7cbad51492def8e Mon Sep 17 00:00:00 2001 From: Damian Muszynski Date: Fri, 19 Jan 2024 17:12:38 +0100 Subject: [PATCH 0189/1405] crypto: qat - fix arbiter mapping generation algorithm for QAT 402xx The commit "crypto: qat - generate dynamically arbiter mappings" introduced a regression on qat_402xx devices. This is reported when the driver probes the device, as indicated by the following error messages: 4xxx 0000:0b:00.0: enabling device (0140 -> 0142) 4xxx 0000:0b:00.0: Generate of the thread to arbiter map failed 4xxx 0000:0b:00.0: Direct firmware load for qat_402xx_mmp.bin failed with error -2 The root cause of this issue was the omission of a necessary function pointer required by the mapping algorithm during the implementation. Fix it by adding the missing function pointer. Fixes: 5da6a2d5353e ("crypto: qat - generate dynamically arbiter mappings") Signed-off-by: Damian Muszynski Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c index 479062aa5e6b6..94a0ebb03d8c9 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -463,6 +463,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id) hw_data->fw_name = ADF_402XX_FW; hw_data->fw_mmp_name = ADF_402XX_MMP; hw_data->uof_get_name = uof_get_name_402xx; + hw_data->get_ena_thd_mask = get_ena_thd_mask; break; case ADF_401XX_PCI_DEVICE_ID: hw_data->fw_name = ADF_4XXX_FW; -- GitLab From c5a2f74db71a849f3a60bc153d684d6d28a0c665 Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Thu, 18 Jan 2024 14:55:57 +0530 Subject: [PATCH 0190/1405] crypto: caam - fix asynchronous hash ahash_alg->setkey is updated to ahash_nosetkey in ahash.c so checking setkey() function to determine hmac algorithm is not valid. to fix this added is_hmac variable in structure caam_hash_alg to determine whether the algorithm is hmac or not. Fixes: 2f1f34c1bf7b ("crypto: ahash - optimize performance when wrapping shash") Signed-off-by: Gaurav Jain Reviewed-by: Eric Biggers Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg_qi2.c | 7 +++++-- drivers/crypto/caam/caamhash.c | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c index a148ff1f0872c..a4f6884416a04 100644 --- a/drivers/crypto/caam/caamalg_qi2.c +++ b/drivers/crypto/caam/caamalg_qi2.c @@ -4545,6 +4545,7 @@ struct caam_hash_alg { struct list_head entry; struct device *dev; int alg_type; + bool is_hmac; struct ahash_alg ahash_alg; }; @@ -4571,7 +4572,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) ctx->dev = caam_hash->dev; - if (alg->setkey) { + if (caam_hash->is_hmac) { ctx->adata.key_dma = dma_map_single_attrs(ctx->dev, ctx->key, ARRAY_SIZE(ctx->key), DMA_TO_DEVICE, @@ -4611,7 +4612,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) * For keyed hash algorithms shared descriptors * will be created later in setkey() callback */ - return alg->setkey ? 0 : ahash_set_sh_desc(ahash); + return caam_hash->is_hmac ? 0 : ahash_set_sh_desc(ahash); } static void caam_hash_cra_exit(struct crypto_tfm *tfm) @@ -4646,12 +4647,14 @@ static struct caam_hash_alg *caam_hash_alloc(struct device *dev, template->hmac_name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->hmac_driver_name); + t_alg->is_hmac = true; } else { snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", template->name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->driver_name); t_alg->ahash_alg.setkey = NULL; + t_alg->is_hmac = false; } alg->cra_module = THIS_MODULE; alg->cra_init = caam_hash_cra_init; diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 290c8500c247f..fdd724228c2fa 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -1753,6 +1753,7 @@ static struct caam_hash_template driver_hash[] = { struct caam_hash_alg { struct list_head entry; int alg_type; + bool is_hmac; struct ahash_engine_alg ahash_alg; }; @@ -1804,7 +1805,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) } else { if (priv->era >= 6) { ctx->dir = DMA_BIDIRECTIONAL; - ctx->key_dir = alg->setkey ? DMA_TO_DEVICE : DMA_NONE; + ctx->key_dir = caam_hash->is_hmac ? DMA_TO_DEVICE : DMA_NONE; } else { ctx->dir = DMA_TO_DEVICE; ctx->key_dir = DMA_NONE; @@ -1862,7 +1863,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) * For keyed hash algorithms shared descriptors * will be created later in setkey() callback */ - return alg->setkey ? 0 : ahash_set_sh_desc(ahash); + return caam_hash->is_hmac ? 0 : ahash_set_sh_desc(ahash); } static void caam_hash_cra_exit(struct crypto_tfm *tfm) @@ -1915,12 +1916,14 @@ caam_hash_alloc(struct caam_hash_template *template, template->hmac_name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->hmac_driver_name); + t_alg->is_hmac = true; } else { snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", template->name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->driver_name); halg->setkey = NULL; + t_alg->is_hmac = false; } alg->cra_module = THIS_MODULE; alg->cra_init = caam_hash_cra_init; -- GitLab From 96204e15310c218fd9355bdcacd02fed1d18070e Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Tue, 23 Jan 2024 17:14:20 +0000 Subject: [PATCH 0191/1405] mm: thp_get_unmapped_area must honour topdown preference The addition of commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") caused the "virtual_address_range" mm selftest to start failing on arm64. Let's fix that regression. There were 2 visible problems when running the test; 1) it takes much longer to execute, and 2) the test fails. Both are related: The (first part of the) test allocates as many 1GB anonymous blocks as it can in the low 256TB of address space, passing NULL as the addr hint to mmap. Before the faulty patch, all allocations were abutted and contained in a single, merged VMA. However, after this patch, each allocation is in its own VMA, and there is a 2M gap between each VMA. This causes the 2 problems in the test: 1) mmap becomes MUCH slower because there are so many VMAs to check to find a new 1G gap. 2) mmap fails once it hits the VMA limit (/proc/sys/vm/max_map_count). Hitting this limit then causes a subsequent calloc() to fail, which causes the test to fail. The problem is that arm64 (unlike x86) selects ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT. But __thp_get_unmapped_area() allocates len+2M then always aligns to the bottom of the discovered gap. That causes the 2M hole. Fix this by detecting cases where we can still achive the alignment goal when moved to the top of the allocated area, if configured to prefer top-down allocation. While we are at it, fix thp_get_unmapped_area's use of pgoff, which should always be zero for anonymous mappings. Prior to the faulty change, while it was possible for user space to pass in pgoff!=0, the old mm->get_unmapped_area() handler would not use it. thp_get_unmapped_area() does use it, so let's explicitly zero it before calling the handler. This should also be the correct behavior for arches that define their own get_unmapped_area() handler. Link: https://lkml.kernel.org/r/20240123171420.3970220-1-ryan.roberts@arm.com Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") Closes: https://lore.kernel.org/linux-mm/1e8f5ac7-54ce-433a-ae53-81522b2320e1@arm.com/ Signed-off-by: Ryan Roberts Reviewed-by: Yang Shi Cc: Matthew Wilcox (Oracle) Cc: Rik van Riel Cc: Signed-off-by: Andrew Morton --- mm/huge_memory.c | 10 ++++++++-- mm/mmap.c | 6 ++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 172b85208276e..94c958f7ebb50 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -810,7 +810,7 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, { loff_t off_end = off + len; loff_t off_align = round_up(off, size); - unsigned long len_pad, ret; + unsigned long len_pad, ret, off_sub; if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall()) return 0; @@ -839,7 +839,13 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, if (ret == addr) return addr; - ret += (off - ret) & (size - 1); + off_sub = (off - ret) & (size - 1); + + if (current->mm->get_unmapped_area == arch_get_unmapped_area_topdown && + !off_sub) + return ret + size; + + ret += off_sub; return ret; } diff --git a/mm/mmap.c b/mm/mmap.c index b78e83d351d28..d89770eaab6b6 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1825,15 +1825,17 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, /* * mmap_region() will call shmem_zero_setup() to create a file, * so use shmem's get_unmapped_area in case it can be huge. - * do_mmap() will clear pgoff, so match alignment. */ - pgoff = 0; get_area = shmem_get_unmapped_area; } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { /* Ensures that larger anonymous mappings are THP aligned. */ get_area = thp_get_unmapped_area; } + /* Always treat pgoff as zero for anonymous memory. */ + if (!file) + pgoff = 0; + addr = get_area(file, addr, len, pgoff, flags); if (IS_ERR_VALUE(addr)) return addr; -- GitLab From 353d321f63f7dbfc9ef58498cc732c9fe886a596 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 23 Jan 2024 20:08:11 +0200 Subject: [PATCH 0192/1405] wifi: iwlwifi: fix double-free bug The storage for the TLV PC register data wasn't done like all the other storage in the drv->fw area, which is cleared at the end of deallocation. Therefore, the freeing must also be done differently, explicitly NULL'ing it out after the free, since otherwise there's a nasty double-free bug here if a file fails to load after this has been parsed, and we get another free later (e.g. because no other file exists.) Fix that by adding the missing NULL assignment. Cc: stable@vger.kernel.org Fixes: 5e31b3df86ec ("wifi: iwlwifi: dbg: print pc register data once fw dump occurred") Reported-by: Guy Kaplan Signed-off-by: Johannes Berg Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240123200528.675f3c24ec0d.I6ab4015cd78d82dd95471f840629972ef0331de3@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index ffe2670720c92..abf8001bdac17 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -128,6 +128,7 @@ static void iwl_dealloc_ucode(struct iwl_drv *drv) kfree(drv->fw.ucode_capa.cmd_versions); kfree(drv->fw.phy_integration_ver); kfree(drv->trans->dbg.pc_data); + drv->trans->dbg.pc_data = NULL; for (i = 0; i < IWL_UCODE_TYPE_MAX; i++) iwl_free_fw_img(drv, drv->fw.img + i); -- GitLab From b743287d7a0007493f5cada34ed2085d475050b4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 25 Jan 2024 09:51:09 +0100 Subject: [PATCH 0193/1405] wifi: cfg80211: fix wiphy delayed work queueing When a wiphy work is queued with timer, and then again without a delay, it's started immediately but *also* started again after the timer expires. This can lead, for example, to warnings in mac80211's offchannel code as reported by Jouni. Running the same work twice isn't expected, of course. Fix this by deleting the timer at this point, when queuing immediately due to delay=0. Cc: stable@vger.kernel.org Reported-by: Jouni Malinen Fixes: a3ee4dc84c4e ("wifi: cfg80211: add a work abstraction with special semantics") Link: https://msgid.link/20240125095108.2feb0eaaa446.I4617f3210ed0e7f252290d5970dac6a876aa595b@changeid Signed-off-by: Johannes Berg --- net/wireless/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index 409d74c57ca0d..3fb1b637352a9 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -1661,6 +1661,7 @@ void wiphy_delayed_work_queue(struct wiphy *wiphy, unsigned long delay) { if (!delay) { + del_timer(&dwork->timer); wiphy_work_queue(wiphy, &dwork->work); return; } -- GitLab From 3a3ef3940798e85121066a859127e72a528dc32a Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 23 Jan 2024 20:08:19 +0200 Subject: [PATCH 0194/1405] wifi: iwlwifi: mvm: skip adding debugfs symlink for reconfig The function to add an interface may be called without a previous removal if the HW is being reconfigured. As such, only add the symlink if the hardware is not being reconfigured due to a HW_RESTART. Fixes: c36235acb34f ("wifi: iwlwifi: mvm: rework debugfs handling") Signed-off-by: Benjamin Berg Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240123200528.314395eacda4.I5823e962c3c3674b942383733debd10b3fe903e2@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 6 ++++-- drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 7f13dff04b265..3447d67a8b311 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1600,7 +1600,8 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw, */ if (vif->type == NL80211_IFTYPE_AP || vif->type == NL80211_IFTYPE_ADHOC) { - iwl_mvm_vif_dbgfs_add_link(mvm, vif); + if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) + iwl_mvm_vif_dbgfs_add_link(mvm, vif); ret = 0; goto out; } @@ -1640,7 +1641,8 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw, iwl_mvm_chandef_get_primary_80(&vif->bss_conf.chandef); } - iwl_mvm_vif_dbgfs_add_link(mvm, vif); + if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) + iwl_mvm_vif_dbgfs_add_link(mvm, vif); if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) && vif->type == NL80211_IFTYPE_STATION && !vif->p2p && diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c index 61170173f917a..1f36e934ef69e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c @@ -81,7 +81,8 @@ static int iwl_mvm_mld_mac_add_interface(struct ieee80211_hw *hw, ieee80211_hw_set(mvm->hw, RX_INCLUDES_FCS); } - iwl_mvm_vif_dbgfs_add_link(mvm, vif); + if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) + iwl_mvm_vif_dbgfs_add_link(mvm, vif); if (!test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) && vif->type == NL80211_IFTYPE_STATION && !vif->p2p && -- GitLab From 1b023d475ae928f3036cefee9ea0a499af1d8900 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 16 Jan 2024 21:05:25 -0600 Subject: [PATCH 0195/1405] wifi: mac80211: Drop WBRF debugging statements Due to the way that debugging is used in the mac80211 subsystem this message ends up being noisier than it needs to be. As the statement is only useful at a first stage of triage for BIOS bugs, just drop it. Cc: Jun Ma Suggested-by: Kalle Valo Signed-off-by: Mario Limonciello Tested-by: Kalle Valo Link: https://msgid.link/20240117030525.539-1-mario.limonciello@amd.com Signed-off-by: Johannes Berg --- net/mac80211/wbrf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/mac80211/wbrf.c b/net/mac80211/wbrf.c index a05c5b971789c..3a86123091373 100644 --- a/net/mac80211/wbrf.c +++ b/net/mac80211/wbrf.c @@ -23,8 +23,6 @@ void ieee80211_check_wbrf_support(struct ieee80211_local *local) return; local->wbrf_supported = acpi_amd_wbrf_supported_producer(dev); - dev_dbg(dev, "WBRF is %s supported\n", - local->wbrf_supported ? "" : "not"); } static void get_chan_freq_boundary(u32 center_freq, u32 bandwidth, u64 *start, u64 *end) -- GitLab From cc4b2dd95f0d1eba8c691b36e8f4d1795582f1ff Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Thu, 25 Jan 2024 20:00:39 +0800 Subject: [PATCH 0196/1405] erofs: fix infinite loop due to a race of filling compressed_bvecs I encountered a race issue after lengthy (~594647 secs) stress tests on a 64k-page arm64 VM with several 4k-block EROFS images. The timing is like below: z_erofs_try_inplace_io z_erofs_fill_bio_vec cmpxchg(&compressed_bvecs[].page, NULL, ..) [access bufvec] compressed_bvecs[] = *bvec; Previously, z_erofs_submit_queue() just accessed bufvec->page only, so other fields in bufvec didn't matter. After the subpage block support is landed, .offset and .end can be used too, but filling bufvec isn't an atomic operation which can cause inconsistency. Let's use a spinlock to keep the atomicity of each bufvec. More specifically, just reuse the existing spinlock `pcl->obj.lockref.lock` since it's rarely used (also it takes a short time if even used) as long as the pcluster has a reference. Fixes: 192351616a9d ("erofs: support I/O submission for sub-page compressed blocks") Signed-off-by: Gao Xiang Reviewed-by: Yue Hu Reviewed-by: Sandeep Dhavale Link: https://lore.kernel.org/r/20240125120039.3228103-1-hsiangkao@linux.alibaba.com --- fs/erofs/zdata.c | 74 +++++++++++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 36 deletions(-) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 583c062cd0e42..c1c77166b30ff 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -563,21 +563,19 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; unsigned int i; - if (i_blocksize(fe->inode) != PAGE_SIZE) - return; - if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED) + if (i_blocksize(fe->inode) != PAGE_SIZE || + fe->mode < Z_EROFS_PCLUSTER_FOLLOWED) return; for (i = 0; i < pclusterpages; ++i) { struct page *page, *newpage; void *t; /* mark pages just found for debugging */ - /* the compressed page was loaded before */ + /* Inaccurate check w/o locking to avoid unneeded lookups */ if (READ_ONCE(pcl->compressed_bvecs[i].page)) continue; page = find_get_page(mc, pcl->obj.index + i); - if (page) { t = (void *)((unsigned long)page | 1); newpage = NULL; @@ -597,9 +595,13 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE); t = (void *)((unsigned long)newpage | 1); } - - if (!cmpxchg_relaxed(&pcl->compressed_bvecs[i].page, NULL, t)) + spin_lock(&pcl->obj.lockref.lock); + if (!pcl->compressed_bvecs[i].page) { + pcl->compressed_bvecs[i].page = t; + spin_unlock(&pcl->obj.lockref.lock); continue; + } + spin_unlock(&pcl->obj.lockref.lock); if (page) put_page(page); @@ -718,31 +720,25 @@ int erofs_init_managed_cache(struct super_block *sb) return 0; } -static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe, - struct z_erofs_bvec *bvec) -{ - struct z_erofs_pcluster *const pcl = fe->pcl; - - while (fe->icur > 0) { - if (!cmpxchg(&pcl->compressed_bvecs[--fe->icur].page, - NULL, bvec->page)) { - pcl->compressed_bvecs[fe->icur] = *bvec; - return true; - } - } - return false; -} - /* callers must be with pcluster lock held */ static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe, struct z_erofs_bvec *bvec, bool exclusive) { + struct z_erofs_pcluster *pcl = fe->pcl; int ret; if (exclusive) { /* give priority for inplaceio to use file pages first */ - if (z_erofs_try_inplace_io(fe, bvec)) + spin_lock(&pcl->obj.lockref.lock); + while (fe->icur > 0) { + if (pcl->compressed_bvecs[--fe->icur].page) + continue; + pcl->compressed_bvecs[fe->icur] = *bvec; + spin_unlock(&pcl->obj.lockref.lock); return 0; + } + spin_unlock(&pcl->obj.lockref.lock); + /* otherwise, check if it can be used as a bvpage */ if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED && !fe->candidate_bvpage) @@ -1423,23 +1419,26 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, { gfp_t gfp = mapping_gfp_mask(mc); bool tocache = false; - struct z_erofs_bvec *zbv = pcl->compressed_bvecs + nr; + struct z_erofs_bvec zbv; struct address_space *mapping; - struct page *page, *oldpage; + struct page *page; int justfound, bs = i_blocksize(f->inode); /* Except for inplace pages, the entire page can be used for I/Os */ bvec->bv_offset = 0; bvec->bv_len = PAGE_SIZE; repeat: - oldpage = READ_ONCE(zbv->page); - if (!oldpage) + spin_lock(&pcl->obj.lockref.lock); + zbv = pcl->compressed_bvecs[nr]; + page = zbv.page; + justfound = (unsigned long)page & 1UL; + page = (struct page *)((unsigned long)page & ~1UL); + pcl->compressed_bvecs[nr].page = page; + spin_unlock(&pcl->obj.lockref.lock); + if (!page) goto out_allocpage; - justfound = (unsigned long)oldpage & 1UL; - page = (struct page *)((unsigned long)oldpage & ~1UL); bvec->bv_page = page; - DBG_BUGON(z_erofs_is_shortlived_page(page)); /* * Handle preallocated cached pages. We tried to allocate such pages @@ -1448,7 +1447,6 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, */ if (page->private == Z_EROFS_PREALLOCATED_PAGE) { set_page_private(page, 0); - WRITE_ONCE(zbv->page, page); tocache = true; goto out_tocache; } @@ -1459,9 +1457,9 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, * therefore it is impossible for `mapping` to be NULL. */ if (mapping && mapping != mc) { - if (zbv->offset < 0) - bvec->bv_offset = round_up(-zbv->offset, bs); - bvec->bv_len = round_up(zbv->end, bs) - bvec->bv_offset; + if (zbv.offset < 0) + bvec->bv_offset = round_up(-zbv.offset, bs); + bvec->bv_len = round_up(zbv.end, bs) - bvec->bv_offset; return; } @@ -1471,7 +1469,6 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, /* the cached page is still in managed cache */ if (page->mapping == mc) { - WRITE_ONCE(zbv->page, page); /* * The cached page is still available but without a valid * `->private` pcluster hint. Let's reconnect them. @@ -1503,11 +1500,15 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, put_page(page); out_allocpage: page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL); - if (oldpage != cmpxchg(&zbv->page, oldpage, page)) { + spin_lock(&pcl->obj.lockref.lock); + if (pcl->compressed_bvecs[nr].page) { erofs_pagepool_add(&f->pagepool, page); + spin_unlock(&pcl->obj.lockref.lock); cond_resched(); goto repeat; } + pcl->compressed_bvecs[nr].page = page; + spin_unlock(&pcl->obj.lockref.lock); bvec->bv_page = page; out_tocache: if (!tocache || bs != PAGE_SIZE || @@ -1685,6 +1686,7 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, if (cur + bvec.bv_len > end) bvec.bv_len = end - cur; + DBG_BUGON(bvec.bv_len < sb->s_blocksize); if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len, bvec.bv_offset)) goto submit_bio_retry; -- GitLab From 690811f0128eb6034e5fd011c3c54878839b4014 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 25 Jan 2024 11:00:14 -0300 Subject: [PATCH 0197/1405] tools headers uapi: Sync linux/stat.h with the kernel sources to pick STATX_MNT_ID_UNIQUE To pick the changes from: 98d2b43081972abe ("add unique mount ID") That add STATX_MNT_ID_UNIQUE that was manually added to tools/perf/trace/beauty/statx.c, at some point this should move to the shell based automated way. This silences this perf build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/stat.h include/uapi/linux/stat.h Cc: Adrian Hunter Cc: Christian Brauner Cc: Ian Rogers Cc: Jiri Olsa Cc: Miklos Szeredi Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZbJq08s19890WDo-@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/stat.h | 1 + tools/perf/trace/beauty/statx.c | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 7cab2c65d3d7f..2f2ee82d55175 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -154,6 +154,7 @@ struct statx { #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ #define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */ #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ +#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ diff --git a/tools/perf/trace/beauty/statx.c b/tools/perf/trace/beauty/statx.c index 5f5320f7c6e27..dc5943a6352d9 100644 --- a/tools/perf/trace/beauty/statx.c +++ b/tools/perf/trace/beauty/statx.c @@ -67,6 +67,7 @@ size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_a P_FLAG(BTIME); P_FLAG(MNT_ID); P_FLAG(DIOALIGN); + P_FLAG(MNT_ID_UNIQUE); #undef P_FLAG -- GitLab From 174372668933ede569b9e56eab26971669a6a0a9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 25 Jan 2024 11:08:44 -0300 Subject: [PATCH 0198/1405] tools arch x86: Sync the msr-index.h copy with the kernel sources to pick IA32_MKTME_KEYID_PARTITIONING To pick up the changes in: 765a0542fdc7aad7 ("x86/virt/tdx: Detect TDX during kernel boot") Addressing this tools/perf build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h That makes the beautification scripts to pick some new entries: $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after $ diff -u before after --- before 2024-01-25 11:08:12.363223880 -0300 +++ after 2024-01-25 11:08:24.839307699 -0300 @@ -21,6 +21,7 @@ [0x0000004f] = "PPIN", [0x00000060] = "LBR_CORE_TO", [0x00000079] = "IA32_UCODE_WRITE", + [0x00000087] = "IA32_MKTME_KEYID_PARTITIONING", [0x0000008b] = "AMD64_PATCH_LEVEL", [0x0000008C] = "IA32_SGXLEPUBKEYHASH0", [0x0000008D] = "IA32_SGXLEPUBKEYHASH1", $ Now one can trace systemwide asking to see backtraces to where that MSR is being read/written, see this example with a previous update: # perf trace -e msr:*_msr/max-stack=32/ --filter="msr==IA32_MKTME_KEYID_PARTITIONING" ^C# If we use -v (verbose mode) we can see what it does behind the scenes: # perf trace -v -e msr:*_msr/max-stack=32/ --filter="msr==IA32_MKTME_KEYID_PARTITIONING" Using CPUID GenuineIntel-6-8E-A 0x87 New filter for msr:read_msr: (msr==0x87) && (common_pid != 58627 && common_pid != 3792) 0x87 New filter for msr:write_msr: (msr==0x87) && (common_pid != 58627 && common_pid != 3792) mmap size 528384B ^C# Example with a frequent msr: # perf trace -v -e msr:*_msr/max-stack=32/ --filter="msr==IA32_SPEC_CTRL" --max-events 2 Using CPUID AuthenticAMD-25-21-0 0x48 New filter for msr:read_msr: (msr==0x48) && (common_pid != 2612129 && common_pid != 3841) 0x48 New filter for msr:write_msr: (msr==0x48) && (common_pid != 2612129 && common_pid != 3841) mmap size 528384B Looking at the vmlinux_path (8 entries long) symsrc__init: build id mismatch for vmlinux. Using /proc/kcore for kernel data Using /proc/kallsyms for symbols 0.000 Timer/2525383 msr:write_msr(msr: IA32_SPEC_CTRL, val: 6) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) __switch_to_xtra ([kernel.kallsyms]) __switch_to ([kernel.kallsyms]) __schedule ([kernel.kallsyms]) schedule ([kernel.kallsyms]) futex_wait_queue_me ([kernel.kallsyms]) futex_wait ([kernel.kallsyms]) do_futex ([kernel.kallsyms]) __x64_sys_futex ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __futex_abstimed_wait_common64 (/usr/lib64/libpthread-2.33.so) 0.030 :0/0 msr:write_msr(msr: IA32_SPEC_CTRL, val: 2) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) __switch_to_xtra ([kernel.kallsyms]) __switch_to ([kernel.kallsyms]) __schedule ([kernel.kallsyms]) schedule_idle ([kernel.kallsyms]) do_idle ([kernel.kallsyms]) cpu_startup_entry ([kernel.kallsyms]) secondary_startup_64_no_verify ([kernel.kallsyms]) # Cc: Adrian Hunter Cc: Dave Hansen Cc: Ian Rogers Cc: Jiri Olsa Cc: Kai Huang Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZbJt27rjkQVU1YoP@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/msr-index.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 1d51e1850ed03..f1bd7b91b3c63 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -237,6 +237,11 @@ #define LBR_INFO_CYCLES 0xffff #define LBR_INFO_BR_TYPE_OFFSET 56 #define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET) +#define LBR_INFO_BR_CNTR_OFFSET 32 +#define LBR_INFO_BR_CNTR_NUM 4 +#define LBR_INFO_BR_CNTR_BITS 2 +#define LBR_INFO_BR_CNTR_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_BITS - 1, 0) +#define LBR_INFO_BR_CNTR_FULL_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS - 1, 0) #define MSR_ARCH_LBR_CTL 0x000014ce #define ARCH_LBR_CTL_LBREN BIT(0) @@ -536,6 +541,9 @@ #define MSR_RELOAD_PMC0 0x000014c1 #define MSR_RELOAD_FIXED_CTR0 0x00001309 +/* KeyID partitioning between MKTME and TDX */ +#define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087 + /* * AMD64 MSRs. Not complete. See the architecture manual for a more * complete list. -- GitLab From b0dc99215598d7fc7c6903437d49f0b33d1ef899 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 25 Jan 2024 11:23:56 -0300 Subject: [PATCH 0199/1405] tools headers UAPI: Sync linux/fcntl.h with the kernel sources To get the changes in: 8a924db2d7b5eb69 ("fs: Pass AT_GETATTR_NOSEC flag to getattr interface function") That don't add anything that is handled by existing hard coded tables or table generation scripts. This silences this perf build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/fcntl.h include/uapi/linux/fcntl.h Cc: Adrian Hunter Cc: Christian Brauner Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stefan Berger Link: https://lore.kernel.org/lkml/ZbJv9fGF_k2xXEdr@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fcntl.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h index 6c80f96049bd0..282e90aeb163c 100644 --- a/tools/include/uapi/linux/fcntl.h +++ b/tools/include/uapi/linux/fcntl.h @@ -116,5 +116,8 @@ #define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to compare object identity and may not be usable to open_by_handle_at(2) */ +#if defined(__KERNEL__) +#define AT_GETATTR_NOSEC 0x80000000 +#endif #endif /* _UAPI_LINUX_FCNTL_H */ -- GitLab From 2dac1f089add90a45d93fe8217938281532b86c7 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 25 Jan 2024 11:03:51 +0100 Subject: [PATCH 0200/1405] perf test: Fix 'perf script' tests on s390 In linux next repo, test case 'perf script tests' fails on s390. The root case is a command line invocation of 'perf record' with call-graph information. On s390 only DWARF formatted call-graphs are supported and only on software events. Change the command line parameters for s390. Output before: # perf test 89 89: perf script tests : FAILED! # Output after: # perf test 89 89: perf script tests : Ok # Fixes: 0dd5041c9a0eaf8c ("perf addr_location: Add init/exit/copy functions") Reviewed-by: Ian Rogers Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Namhyung Kim Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: https://lore.kernel.org/r/20240125100351.936262-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/script.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/script.sh b/tools/perf/tests/shell/script.sh index 5ae7bd0031a82..2973adab445d2 100755 --- a/tools/perf/tests/shell/script.sh +++ b/tools/perf/tests/shell/script.sh @@ -54,7 +54,14 @@ def sample_table(*args): def call_path_table(*args): print(f'call_path_table({args}') _end_of_file_ - perf record -g -o "${perfdatafile}" true + case $(uname -m) + in s390x) + cmd_flags="--call-graph dwarf -e cpu-clock";; + *) + cmd_flags="-g";; + esac + + perf record $cmd_flags -o "${perfdatafile}" true perf script -i "${perfdatafile}" -s "${db_test}" echo "DB test [Success]" } -- GitLab From 9d95c6be48fc8d3d622658da8fbd6d6787d5c2e7 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 23 Jan 2024 20:30:11 -0800 Subject: [PATCH 0201/1405] perf list: Switch error message to pr_err() to respect debug settings (-v) Using printf() can interrupt 'perf list output', use pr_err() which can respect debug settings and the debug file. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Ross Zwisler Cc: Shirisha G Link: https://lore.kernel.org/r/20240124043015.1388867-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/print-events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index b0fc48be623f3..9e47712507cc2 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -66,7 +66,7 @@ void print_tracepoint_events(const struct print_callbacks *print_cb __maybe_unus put_tracing_file(events_path); if (events_fd < 0) { - printf("Error: failed to open tracing events directory\n"); + pr_err("Error: failed to open tracing events directory\n"); return; } -- GitLab From 79bacb6ad73ce63faba3564671c2028e6c3fa1e2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 23 Jan 2024 20:30:12 -0800 Subject: [PATCH 0202/1405] perf list: Add output file option Add an option to write the 'perf list' output to a specific file. This can avoid issues with debug output being written into the output stream. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Ross Zwisler Cc: Shirisha G Link: https://lore.kernel.org/r/20240124043015.1388867-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 4 + tools/perf/builtin-list.c | 211 +++++++++++++++---------- 2 files changed, 133 insertions(+), 82 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 1b90575ee3c84..3b12595193c9f 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -47,6 +47,10 @@ Print PMU events and metrics limited to the specific PMU name. --json:: Output in JSON format. +-o:: +--output=:: + Output file name. By default output is written to stdout. + [[EVENT_MODIFIERS]] EVENT MODIFIERS --------------- diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 61c2c96cc0701..e27a1b1288c29 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -30,6 +30,8 @@ * functions. */ struct print_state { + /** @fp: File to write output to. */ + FILE *fp; /** * @pmu_glob: Optionally restrict PMU and metric matching to PMU or * debugfs subsystem name. @@ -66,13 +68,15 @@ static void default_print_start(void *ps) { struct print_state *print_state = ps; - if (!print_state->name_only && pager_in_use()) - printf("\nList of pre-defined events (to be used in -e or -M):\n\n"); + if (!print_state->name_only && pager_in_use()) { + fprintf(print_state->fp, + "\nList of pre-defined events (to be used in -e or -M):\n\n"); + } } static void default_print_end(void *print_state __maybe_unused) {} -static void wordwrap(const char *s, int start, int max, int corr) +static void wordwrap(FILE *fp, const char *s, int start, int max, int corr) { int column = start; int n; @@ -82,10 +86,10 @@ static void wordwrap(const char *s, int start, int max, int corr) int wlen = strcspn(s, " \t\n"); if ((column + wlen >= max && column > start) || saw_newline) { - printf("\n%*s", start, ""); + fprintf(fp, "\n%*s", start, ""); column = start + corr; } - n = printf("%s%.*s", column > start ? " " : "", wlen, s); + n = fprintf(fp, "%s%.*s", column > start ? " " : "", wlen, s); if (n <= 0) break; saw_newline = s[wlen] == '\n'; @@ -104,6 +108,7 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi { struct print_state *print_state = ps; int pos; + FILE *fp = print_state->fp; if (deprecated && !print_state->deprecated) return; @@ -119,30 +124,30 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi if (print_state->name_only) { if (event_alias && strlen(event_alias)) - printf("%s ", event_alias); + fprintf(fp, "%s ", event_alias); else - printf("%s ", event_name); + fprintf(fp, "%s ", event_name); return; } if (strcmp(print_state->last_topic, topic ?: "")) { if (topic) - printf("\n%s:\n", topic); + fprintf(fp, "\n%s:\n", topic); zfree(&print_state->last_topic); print_state->last_topic = strdup(topic ?: ""); } if (event_alias && strlen(event_alias)) - pos = printf(" %s OR %s", event_name, event_alias); + pos = fprintf(fp, " %s OR %s", event_name, event_alias); else - pos = printf(" %s", event_name); + pos = fprintf(fp, " %s", event_name); if (!topic && event_type_desc) { for (; pos < 53; pos++) - putchar(' '); - printf("[%s]\n", event_type_desc); + fputc(' ', fp); + fprintf(fp, "[%s]\n", event_type_desc); } else - putchar('\n'); + fputc('\n', fp); if (desc && print_state->desc) { char *desc_with_unit = NULL; @@ -155,22 +160,22 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi ? "%s. Unit: %s" : "%s Unit: %s", desc, pmu_name); } - printf("%*s", 8, "["); - wordwrap(desc_len > 0 ? desc_with_unit : desc, 8, pager_get_columns(), 0); - printf("]\n"); + fprintf(fp, "%*s", 8, "["); + wordwrap(fp, desc_len > 0 ? desc_with_unit : desc, 8, pager_get_columns(), 0); + fprintf(fp, "]\n"); free(desc_with_unit); } long_desc = long_desc ?: desc; if (long_desc && print_state->long_desc) { - printf("%*s", 8, "["); - wordwrap(long_desc, 8, pager_get_columns(), 0); - printf("]\n"); + fprintf(fp, "%*s", 8, "["); + wordwrap(fp, long_desc, 8, pager_get_columns(), 0); + fprintf(fp, "]\n"); } if (print_state->detailed && encoding_desc) { - printf("%*s", 8, ""); - wordwrap(encoding_desc, 8, pager_get_columns(), 0); - putchar('\n'); + fprintf(fp, "%*s", 8, ""); + wordwrap(fp, encoding_desc, 8, pager_get_columns(), 0); + fputc('\n', fp); } } @@ -184,6 +189,7 @@ static void default_print_metric(void *ps, const char *unit __maybe_unused) { struct print_state *print_state = ps; + FILE *fp = print_state->fp; if (print_state->event_glob && (!print_state->metrics || !name || !strglobmatch(name, print_state->event_glob)) && @@ -192,27 +198,27 @@ static void default_print_metric(void *ps, if (!print_state->name_only && !print_state->last_metricgroups) { if (print_state->metricgroups) { - printf("\nMetric Groups:\n"); + fprintf(fp, "\nMetric Groups:\n"); if (!print_state->metrics) - putchar('\n'); + fputc('\n', fp); } else { - printf("\nMetrics:\n\n"); + fprintf(fp, "\nMetrics:\n\n"); } } if (!print_state->last_metricgroups || strcmp(print_state->last_metricgroups, group ?: "")) { if (group && print_state->metricgroups) { if (print_state->name_only) - printf("%s ", group); + fprintf(fp, "%s ", group); else if (print_state->metrics) { const char *gdesc = describe_metricgroup(group); if (gdesc) - printf("\n%s: [%s]\n", group, gdesc); + fprintf(fp, "\n%s: [%s]\n", group, gdesc); else - printf("\n%s:\n", group); + fprintf(fp, "\n%s:\n", group); } else - printf("%s\n", group); + fprintf(fp, "%s\n", group); } zfree(&print_state->last_metricgroups); print_state->last_metricgroups = strdup(group ?: ""); @@ -223,53 +229,59 @@ static void default_print_metric(void *ps, if (print_state->name_only) { if (print_state->metrics && !strlist__has_entry(print_state->visited_metrics, name)) { - printf("%s ", name); + fprintf(fp, "%s ", name); strlist__add(print_state->visited_metrics, name); } return; } - printf(" %s\n", name); + fprintf(fp, " %s\n", name); if (desc && print_state->desc) { - printf("%*s", 8, "["); - wordwrap(desc, 8, pager_get_columns(), 0); - printf("]\n"); + fprintf(fp, "%*s", 8, "["); + wordwrap(fp, desc, 8, pager_get_columns(), 0); + fprintf(fp, "]\n"); } if (long_desc && print_state->long_desc) { - printf("%*s", 8, "["); - wordwrap(long_desc, 8, pager_get_columns(), 0); - printf("]\n"); + fprintf(fp, "%*s", 8, "["); + wordwrap(fp, long_desc, 8, pager_get_columns(), 0); + fprintf(fp, "]\n"); } if (expr && print_state->detailed) { - printf("%*s", 8, "["); - wordwrap(expr, 8, pager_get_columns(), 0); - printf("]\n"); + fprintf(fp, "%*s", 8, "["); + wordwrap(fp, expr, 8, pager_get_columns(), 0); + fprintf(fp, "]\n"); } if (threshold && print_state->detailed) { - printf("%*s", 8, "["); - wordwrap(threshold, 8, pager_get_columns(), 0); - printf("]\n"); + fprintf(fp, "%*s", 8, "["); + wordwrap(fp, threshold, 8, pager_get_columns(), 0); + fprintf(fp, "]\n"); } } struct json_print_state { + /** @fp: File to write output to. */ + FILE *fp; /** Should a separator be printed prior to the next item? */ bool need_sep; }; -static void json_print_start(void *print_state __maybe_unused) +static void json_print_start(void *ps) { - printf("[\n"); + struct json_print_state *print_state = ps; + FILE *fp = print_state->fp; + + fprintf(fp, "[\n"); } static void json_print_end(void *ps) { struct json_print_state *print_state = ps; + FILE *fp = print_state->fp; - printf("%s]\n", print_state->need_sep ? "\n" : ""); + fprintf(fp, "%s]\n", print_state->need_sep ? "\n" : ""); } -static void fix_escape_printf(struct strbuf *buf, const char *fmt, ...) +static void fix_escape_fprintf(FILE *fp, struct strbuf *buf, const char *fmt, ...) { va_list args; @@ -318,7 +330,7 @@ static void fix_escape_printf(struct strbuf *buf, const char *fmt, ...) } } va_end(args); - fputs(buf->buf, stdout); + fputs(buf->buf, fp); } static void json_print_event(void *ps, const char *pmu_name, const char *topic, @@ -330,60 +342,71 @@ static void json_print_event(void *ps, const char *pmu_name, const char *topic, { struct json_print_state *print_state = ps; bool need_sep = false; + FILE *fp = print_state->fp; struct strbuf buf; strbuf_init(&buf, 0); - printf("%s{\n", print_state->need_sep ? ",\n" : ""); + fprintf(fp, "%s{\n", print_state->need_sep ? ",\n" : ""); print_state->need_sep = true; if (pmu_name) { - fix_escape_printf(&buf, "\t\"Unit\": \"%S\"", pmu_name); + fix_escape_fprintf(fp, &buf, "\t\"Unit\": \"%S\"", pmu_name); need_sep = true; } if (topic) { - fix_escape_printf(&buf, "%s\t\"Topic\": \"%S\"", need_sep ? ",\n" : "", topic); + fix_escape_fprintf(fp, &buf, "%s\t\"Topic\": \"%S\"", + need_sep ? ",\n" : "", + topic); need_sep = true; } if (event_name) { - fix_escape_printf(&buf, "%s\t\"EventName\": \"%S\"", need_sep ? ",\n" : "", - event_name); + fix_escape_fprintf(fp, &buf, "%s\t\"EventName\": \"%S\"", + need_sep ? ",\n" : "", + event_name); need_sep = true; } if (event_alias && strlen(event_alias)) { - fix_escape_printf(&buf, "%s\t\"EventAlias\": \"%S\"", need_sep ? ",\n" : "", - event_alias); + fix_escape_fprintf(fp, &buf, "%s\t\"EventAlias\": \"%S\"", + need_sep ? ",\n" : "", + event_alias); need_sep = true; } if (scale_unit && strlen(scale_unit)) { - fix_escape_printf(&buf, "%s\t\"ScaleUnit\": \"%S\"", need_sep ? ",\n" : "", - scale_unit); + fix_escape_fprintf(fp, &buf, "%s\t\"ScaleUnit\": \"%S\"", + need_sep ? ",\n" : "", + scale_unit); need_sep = true; } if (event_type_desc) { - fix_escape_printf(&buf, "%s\t\"EventType\": \"%S\"", need_sep ? ",\n" : "", - event_type_desc); + fix_escape_fprintf(fp, &buf, "%s\t\"EventType\": \"%S\"", + need_sep ? ",\n" : "", + event_type_desc); need_sep = true; } if (deprecated) { - fix_escape_printf(&buf, "%s\t\"Deprecated\": \"%S\"", need_sep ? ",\n" : "", - deprecated ? "1" : "0"); + fix_escape_fprintf(fp, &buf, "%s\t\"Deprecated\": \"%S\"", + need_sep ? ",\n" : "", + deprecated ? "1" : "0"); need_sep = true; } if (desc) { - fix_escape_printf(&buf, "%s\t\"BriefDescription\": \"%S\"", need_sep ? ",\n" : "", - desc); + fix_escape_fprintf(fp, &buf, "%s\t\"BriefDescription\": \"%S\"", + need_sep ? ",\n" : "", + desc); need_sep = true; } if (long_desc) { - fix_escape_printf(&buf, "%s\t\"PublicDescription\": \"%S\"", need_sep ? ",\n" : "", - long_desc); + fix_escape_fprintf(fp, &buf, "%s\t\"PublicDescription\": \"%S\"", + need_sep ? ",\n" : "", + long_desc); need_sep = true; } if (encoding_desc) { - fix_escape_printf(&buf, "%s\t\"Encoding\": \"%S\"", need_sep ? ",\n" : "", - encoding_desc); + fix_escape_fprintf(fp, &buf, "%s\t\"Encoding\": \"%S\"", + need_sep ? ",\n" : "", + encoding_desc); need_sep = true; } - printf("%s}", need_sep ? "\n" : ""); + fprintf(fp, "%s}", need_sep ? "\n" : ""); strbuf_release(&buf); } @@ -394,43 +417,53 @@ static void json_print_metric(void *ps __maybe_unused, const char *group, { struct json_print_state *print_state = ps; bool need_sep = false; + FILE *fp = print_state->fp; struct strbuf buf; strbuf_init(&buf, 0); - printf("%s{\n", print_state->need_sep ? ",\n" : ""); + fprintf(fp, "%s{\n", print_state->need_sep ? ",\n" : ""); print_state->need_sep = true; if (group) { - fix_escape_printf(&buf, "\t\"MetricGroup\": \"%S\"", group); + fix_escape_fprintf(fp, &buf, "\t\"MetricGroup\": \"%S\"", group); need_sep = true; } if (name) { - fix_escape_printf(&buf, "%s\t\"MetricName\": \"%S\"", need_sep ? ",\n" : "", name); + fix_escape_fprintf(fp, &buf, "%s\t\"MetricName\": \"%S\"", + need_sep ? ",\n" : "", + name); need_sep = true; } if (expr) { - fix_escape_printf(&buf, "%s\t\"MetricExpr\": \"%S\"", need_sep ? ",\n" : "", expr); + fix_escape_fprintf(fp, &buf, "%s\t\"MetricExpr\": \"%S\"", + need_sep ? ",\n" : "", + expr); need_sep = true; } if (threshold) { - fix_escape_printf(&buf, "%s\t\"MetricThreshold\": \"%S\"", need_sep ? ",\n" : "", - threshold); + fix_escape_fprintf(fp, &buf, "%s\t\"MetricThreshold\": \"%S\"", + need_sep ? ",\n" : "", + threshold); need_sep = true; } if (unit) { - fix_escape_printf(&buf, "%s\t\"ScaleUnit\": \"%S\"", need_sep ? ",\n" : "", unit); + fix_escape_fprintf(fp, &buf, "%s\t\"ScaleUnit\": \"%S\"", + need_sep ? ",\n" : "", + unit); need_sep = true; } if (desc) { - fix_escape_printf(&buf, "%s\t\"BriefDescription\": \"%S\"", need_sep ? ",\n" : "", - desc); + fix_escape_fprintf(fp, &buf, "%s\t\"BriefDescription\": \"%S\"", + need_sep ? ",\n" : "", + desc); need_sep = true; } if (long_desc) { - fix_escape_printf(&buf, "%s\t\"PublicDescription\": \"%S\"", need_sep ? ",\n" : "", - long_desc); + fix_escape_fprintf(fp, &buf, "%s\t\"PublicDescription\": \"%S\"", + need_sep ? ",\n" : "", + long_desc); need_sep = true; } - printf("%s}", need_sep ? "\n" : ""); + fprintf(fp, "%s}", need_sep ? "\n" : ""); strbuf_release(&buf); } @@ -449,8 +482,12 @@ static bool default_skip_duplicate_pmus(void *ps) int cmd_list(int argc, const char **argv) { int i, ret = 0; - struct print_state default_ps = {}; - struct print_state json_ps = {}; + struct print_state default_ps = { + .fp = stdout, + }; + struct print_state json_ps = { + .fp = stdout, + }; void *ps = &default_ps; struct print_callbacks print_cb = { .print_start = default_print_start, @@ -461,6 +498,7 @@ int cmd_list(int argc, const char **argv) }; const char *cputype = NULL; const char *unit_name = NULL; + const char *output_path = NULL; bool json = false; struct option list_options[] = { OPT_BOOLEAN(0, "raw-dump", &default_ps.name_only, "Dump raw events"), @@ -471,6 +509,7 @@ int cmd_list(int argc, const char **argv) "Print longer event descriptions."), OPT_BOOLEAN(0, "details", &default_ps.detailed, "Print information on the perf event names and expressions used internally by events."), + OPT_STRING('o', "output", &output_path, "file", "output file name"), OPT_BOOLEAN(0, "deprecated", &default_ps.deprecated, "Print deprecated events."), OPT_STRING(0, "cputype", &cputype, "cpu type", @@ -497,6 +536,11 @@ int cmd_list(int argc, const char **argv) argc = parse_options(argc, argv, list_options, list_usage, PARSE_OPT_STOP_AT_NON_OPTION); + if (output_path) { + default_ps.fp = fopen(output_path, "w"); + json_ps.fp = default_ps.fp; + } + setup_pager(); if (!default_ps.name_only) @@ -618,5 +662,8 @@ int cmd_list(int argc, const char **argv) free(default_ps.last_topic); free(default_ps.last_metricgroups); strlist__delete(default_ps.visited_metrics); + if (output_path) + fclose(default_ps.fp); + return ret; } -- GitLab From a734c7f969750302b1150ef20468704498f74e64 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 23 Jan 2024 20:30:13 -0800 Subject: [PATCH 0203/1405] perf test: Workaround debug output in list test Write the JSON output to a specific file to avoid debug output breaking it. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Ross Zwisler Cc: Shirisha G Link: https://lore.kernel.org/r/20240124043015.1388867-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/list.sh | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/shell/list.sh b/tools/perf/tests/shell/list.sh index 22b004f2b23ec..8a868ae64560e 100755 --- a/tools/perf/tests/shell/list.sh +++ b/tools/perf/tests/shell/list.sh @@ -3,17 +3,32 @@ # SPDX-License-Identifier: GPL-2.0 set -e -err=0 shelldir=$(dirname "$0") # shellcheck source=lib/setup_python.sh . "${shelldir}"/lib/setup_python.sh +list_output=$(mktemp /tmp/__perf_test.list_output.json.XXXXX) + +cleanup() { + rm -f "${list_output}" + + trap - EXIT TERM INT +} + +trap_cleanup() { + cleanup + exit 1 +} +trap trap_cleanup EXIT TERM INT + test_list_json() { echo "Json output test" - perf list -j | $PYTHON -m json.tool + perf list -j -o "${list_output}" + $PYTHON -m json.tool "${list_output}" echo "Json output test [Success]" } test_list_json -exit $err +cleanup +exit 0 -- GitLab From 1c2124ec8431f88f5c122be73cb4b784b558d600 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 23 Jan 2024 20:30:14 -0800 Subject: [PATCH 0204/1405] perf test shell script: Fix test for python being disabled "grep -cv" can exit with an error code that causes the "set -e" to abort the script. Switch to using the grep exit code in the if condition to avoid this. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Ross Zwisler Cc: Shirisha G Link: https://lore.kernel.org/r/20240124043015.1388867-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/script.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/script.sh b/tools/perf/tests/shell/script.sh index 2973adab445d2..fa4d71e2e72a6 100755 --- a/tools/perf/tests/shell/script.sh +++ b/tools/perf/tests/shell/script.sh @@ -36,8 +36,7 @@ test_db() echo "DB test" # Check if python script is supported - libpython=$(perf version --build-options | grep python | grep -cv OFF) - if [ "${libpython}" != "1" ] ; then + if perf version --build-options | grep python | grep -q OFF ; then echo "SKIP: python scripting is not supported" err=2 return -- GitLab From 9a8dd2f24d1cb8e8df2c5d18b4973298db1006e1 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 23 Jan 2024 20:30:15 -0800 Subject: [PATCH 0205/1405] perf test shell daemon: Make signal test less racy The daemon signal test sends signals and then expects files to be written. It was observed on an Intel Alderlake that the signals were sent too quickly leading to the 3 expected files not appearing. To avoid this send the next signal only after the expected previous file has appeared. To avoid an infinite loop the number of retries is limited. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Rajeev Cc: Ingo Molnar Cc: Kajol Jain Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Ross Zwisler Cc: Shirisha G Link: https://lore.kernel.org/r/20240124043015.1388867-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/daemon.sh | 34 ++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh index 4c598cfc5afa1..e5fa8d6f9eb1f 100755 --- a/tools/perf/tests/shell/daemon.sh +++ b/tools/perf/tests/shell/daemon.sh @@ -414,16 +414,30 @@ EOF # start daemon daemon_start ${config} test - # send 2 signals - perf daemon signal --config ${config} --session test - perf daemon signal --config ${config} - - # stop daemon - daemon_exit ${config} - - # count is 2 perf.data for signals and 1 for perf record finished - count=`ls ${base}/session-test/*perf.data* | wc -l` - if [ ${count} -ne 3 ]; then + # send 2 signals then exit. Do this in a loop watching the number of + # files to avoid races. If the loop retries more than 600 times then + # give up. + local retries=0 + local signals=0 + local success=0 + while [ ${retries} -lt 600 ] && [ ${success} -eq 0 ]; do + local files + files=`ls ${base}/session-test/*perf.data* 2> /dev/null | wc -l` + if [ ${signals} -eq 0 ]; then + perf daemon signal --config ${config} --session test + signals=1 + elif [ ${signals} -eq 1 ] && [ $files -ge 1 ]; then + perf daemon signal --config ${config} + signals=2 + elif [ ${signals} -eq 2 ] && [ $files -ge 2 ]; then + daemon_exit ${config} + signals=3 + elif [ ${signals} -eq 3 ] && [ $files -ge 3 ]; then + success=1 + fi + retries=$((${retries} +1)) + done + if [ ${success} -eq 0 ]; then error=1 echo "FAILED: perf data no generated" fi -- GitLab From 1233d1d54b7f66813cfa748aaaeca8c4f9c36c6b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 26 Jan 2024 11:00:01 -0300 Subject: [PATCH 0206/1405] tools headers UAPI: Update tools's copy of drm.h headers to pick DRM_IOCTL_MODE_CLOSEFB Picking the changes from: 8570c27932e132d2 ("drm/syncobj: Add deadline support for syncobj waits") 9724ed6c1b1212d1 ("drm: Introduce DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT") e4d983acffff270c ("drm: introduce DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP") d208d875667e2a29 ("drm: introduce CLOSEFB IOCTL") afa5cf3175a22b71 ("drm/i915/uapi: fix typos/spellos and punctuation") Addressing these perf build warnings: Warning: Kernel ABI header differences: Now 'perf trace' and other code that might use the tools/perf/trace/beauty autogenerated tables will be able to translate this new ioctl command into a string: $ tools/perf/trace/beauty/drm_ioctl.sh > before $ cp include/uapi/drm/drm.h tools/include/uapi/drm/drm.h $ tools/perf/trace/beauty/drm_ioctl.sh > after $ diff -u before after --- before 2024-01-26 10:54:23.486381862 -0300 +++ after 2024-01-26 10:54:35.767902442 -0300 @@ -109,6 +109,7 @@ [0xCD] = "SYNCOBJ_TIMELINE_SIGNAL", [0xCE] = "MODE_GETFB2", [0xCF] = "SYNCOBJ_EVENTFD", + [0xD0] = "MODE_CLOSEFB", [DRM_COMMAND_BASE + 0x00] = "I915_INIT", [DRM_COMMAND_BASE + 0x01] = "I915_FLUSH", [DRM_COMMAND_BASE + 0x02] = "I915_FLIP", $ Cc: Adrian Hunter Cc: Dmitry Baryshkov Cc: Ian Rogers Cc: Javier Martinez Canillas Cc: Jiri Olsa Cc: Namhyung Kim Cc: Randy Dunlap Cc: Rob Clark Cc: Simon Ser Cc: Tvrtko Ursulin Cc: Zack Rusin Link: https://lore.kernel.org/lkml/ZbPIN9Dcc5AM0uxo@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/drm.h | 72 ++++++++++++++++++++++++++++++- tools/include/uapi/drm/i915_drm.h | 12 +++--- 2 files changed, 77 insertions(+), 7 deletions(-) diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index de723566c5ae8..16122819edfef 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -713,7 +713,8 @@ struct drm_gem_open { /** * DRM_CAP_ASYNC_PAGE_FLIP * - * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC. + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy + * page-flips. */ #define DRM_CAP_ASYNC_PAGE_FLIP 0x7 /** @@ -773,6 +774,13 @@ struct drm_gem_open { * :ref:`drm_sync_objects`. */ #define DRM_CAP_SYNCOBJ_TIMELINE 0x14 +/** + * DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP + * + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic + * commits. + */ +#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 0x15 /* DRM_IOCTL_GET_CAP ioctl argument type */ struct drm_get_cap { @@ -842,6 +850,31 @@ struct drm_get_cap { */ #define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 +/** + * DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT + * + * Drivers for para-virtualized hardware (e.g. vmwgfx, qxl, virtio and + * virtualbox) have additional restrictions for cursor planes (thus + * making cursor planes on those drivers not truly universal,) e.g. + * they need cursor planes to act like one would expect from a mouse + * cursor and have correctly set hotspot properties. + * If this client cap is not set the DRM core will hide cursor plane on + * those virtualized drivers because not setting it implies that the + * client is not capable of dealing with those extra restictions. + * Clients which do set cursor hotspot and treat the cursor plane + * like a mouse cursor should set this property. + * The client must enable &DRM_CLIENT_CAP_ATOMIC first. + * + * Setting this property on drivers which do not special case + * cursor planes (i.e. non-virtualized drivers) will return + * EOPNOTSUPP, which can be used by userspace to gauge + * requirements of the hardware/drivers they're running on. + * + * This capability is always supported for atomic-capable virtualized + * drivers starting from kernel version 6.6. + */ +#define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT 6 + /* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ struct drm_set_client_cap { __u64 capability; @@ -893,6 +926,7 @@ struct drm_syncobj_transfer { #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */ +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE (1 << 3) /* set fence deadline to deadline_nsec */ struct drm_syncobj_wait { __u64 handles; /* absolute timeout */ @@ -901,6 +935,14 @@ struct drm_syncobj_wait { __u32 flags; __u32 first_signaled; /* only valid when not waiting all */ __u32 pad; + /** + * @deadline_nsec - fence deadline hint + * + * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing + * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is + * set. + */ + __u64 deadline_nsec; }; struct drm_syncobj_timeline_wait { @@ -913,6 +955,14 @@ struct drm_syncobj_timeline_wait { __u32 flags; __u32 first_signaled; /* only valid when not waiting all */ __u32 pad; + /** + * @deadline_nsec - fence deadline hint + * + * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing + * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is + * set. + */ + __u64 deadline_nsec; }; /** @@ -1218,6 +1268,26 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd) +/** + * DRM_IOCTL_MODE_CLOSEFB - Close a framebuffer. + * + * This closes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL + * argument is a framebuffer object ID. + * + * This IOCTL is similar to &DRM_IOCTL_MODE_RMFB, except it doesn't disable + * planes and CRTCs. As long as the framebuffer is used by a plane, it's kept + * alive. When the plane no longer uses the framebuffer (because the + * framebuffer is replaced with another one, or the plane is disabled), the + * framebuffer is cleaned up. + * + * This is useful to implement flicker-free transitions between two processes. + * + * Depending on the threat model, user-space may want to ensure that the + * framebuffer doesn't expose any sensitive user information: closed + * framebuffers attached to a plane can be read back by the next DRM master. + */ +#define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb) + /* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 218edb0a96f8c..fd4f9574d177a 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -693,7 +693,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_EXEC_FENCE 44 /* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture - * user specified bufffers for post-mortem debugging of GPU hangs. See + * user-specified buffers for post-mortem debugging of GPU hangs. See * EXEC_OBJECT_CAPTURE. */ #define I915_PARAM_HAS_EXEC_CAPTURE 45 @@ -1606,7 +1606,7 @@ struct drm_i915_gem_busy { * is accurate. * * The returned dword is split into two fields to indicate both - * the engine classess on which the object is being read, and the + * the engine classes on which the object is being read, and the * engine class on which it is currently being written (if any). * * The low word (bits 0:15) indicate if the object is being written @@ -1815,7 +1815,7 @@ struct drm_i915_gem_madvise { __u32 handle; /* Advice: either the buffer will be needed again in the near future, - * or wont be and could be discarded under memory pressure. + * or won't be and could be discarded under memory pressure. */ __u32 madv; @@ -3246,7 +3246,7 @@ struct drm_i915_query_topology_info { * // enough to hold our array of engines. The kernel will fill out the * // item.length for us, which is the number of bytes we need. * // - * // Alternatively a large buffer can be allocated straight away enabling + * // Alternatively a large buffer can be allocated straightaway enabling * // querying in one pass, in which case item.length should contain the * // length of the provided buffer. * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); @@ -3256,7 +3256,7 @@ struct drm_i915_query_topology_info { * // Now that we allocated the required number of bytes, we call the ioctl * // again, this time with the data_ptr pointing to our newly allocated * // blob, which the kernel can then populate with info on all engines. - * item.data_ptr = (uintptr_t)&info, + * item.data_ptr = (uintptr_t)&info; * * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); * if (err) ... @@ -3286,7 +3286,7 @@ struct drm_i915_query_topology_info { /** * struct drm_i915_engine_info * - * Describes one engine and it's capabilities as known to the driver. + * Describes one engine and its capabilities as known to the driver. */ struct drm_i915_engine_info { /** @engine: Engine class and instance. */ -- GitLab From 39af67413997a350fa02a88e15eaacf202c94884 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 22 Jan 2024 16:04:06 +0800 Subject: [PATCH 0207/1405] perf build: Make minimal shellcheck version to v0.6.0 The perf build failed due to the shellcheck on my machine (v0.4.6 on Ubuntu 18.04.1 LTS) doesn't support -a/--check-sourced and -S/--severity option. These two options are introduced in shellcheck v0.4.7 and v0.6.0 respectively. So restrict the minimal version of shellcheck to v0.6.0. Fixes: b809fc656e763296 ("perf build: Shellcheck support for OUTPUT directory") Reviewed-by: Ian Rogers Signed-off-by: Yicong Yang Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Junhao He Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: linuxarm@huawei.com Link: https://lore.kernel.org/r/20240122080406.28678-1-yangyicong@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 27e7c478880fd..f8774a9b1377a 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -236,6 +236,16 @@ else SHELLCHECK := $(shell which shellcheck 2> /dev/null) endif +# shellcheck is using in tools/perf/tests/Build with option -a/--check-sourced ( +# introduced in v0.4.7) and -S/--severity (introduced in v0.6.0). So make the +# minimal shellcheck version as v0.6.0. +ifneq ($(SHELLCHECK),) + ifeq ($(shell expr $(shell $(SHELLCHECK) --version | grep version: | \ + sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \< 060), 1) + SHELLCHECK := + endif +endif + export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK export HOSTCC HOSTLD HOSTAR HOSTCFLAGS SHELLCHECK -- GitLab From 79baac8acfc60a7a5114f6d60731e28c242ef8ce Mon Sep 17 00:00:00 2001 From: Sun Haiyong Date: Mon, 4 Dec 2023 16:20:55 +0800 Subject: [PATCH 0208/1405] perf top: Remove needless malloc(0) call that triggers -Walloc-size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC 14 introduces a new -Walloc-size included in -Wextra which errors out like: builtin-top.c: In function ‘prompt_integer’: builtin-top.c:360:21: error: allocation of insufficient size ‘0’ for type ‘char’ with size ‘1’ [-Werror=alloc-size] 360 | char *buf = malloc(0), *p; | ^~~~~~ Just set it to NULL, getline() will do the allocation. Signed-off-by: Sun Haiyong Signed-off-by: Yanteng Si Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231204082055.91877-1-siyanteng@loongson.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index baf1ab083436e..5301d1badd435 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -357,7 +357,7 @@ static void perf_top__print_sym_table(struct perf_top *top) static void prompt_integer(int *target, const char *msg) { - char *buf = malloc(0), *p; + char *buf = NULL, *p; size_t dummy = 0; int tmp; -- GitLab From eeca59a6e8e610e593d17e12f67324c615ec5ef2 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Thu, 25 Jan 2024 23:54:57 +0300 Subject: [PATCH 0209/1405] ALSA: usb-audio: Support read-only clock selector control Clock selector control might be read-only. Add corresponding checks to prevent sending control requests that would fail. Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20240125205457.28258-1-alexander@tsoy.me Signed-off-by: Takashi Iwai --- sound/usb/clock.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 94e4aaeafe588..7b259641adb51 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -261,6 +261,8 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, int ret, i, cur, err, pins, clock_id; const u8 *sources; int proto = fmt->protocol; + bool readable, writeable; + u32 bmControls; entity_id &= 0xff; @@ -292,11 +294,27 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, sources = GET_VAL(selector, proto, baCSourceID); cur = 0; + if (proto == UAC_VERSION_3) + bmControls = le32_to_cpu(*(__le32 *)(&selector->v3.baCSourceID[0] + pins)); + else + bmControls = *(__u8 *)(&selector->v2.baCSourceID[0] + pins); + + readable = uac_v2v3_control_is_readable(bmControls, + UAC2_CX_CLOCK_SELECTOR); + writeable = uac_v2v3_control_is_writeable(bmControls, + UAC2_CX_CLOCK_SELECTOR); + if (pins == 1) { ret = 1; goto find_source; } + /* for now just warn about buggy device */ + if (!readable) + usb_audio_warn(chip, + "%s(): clock selector control is not readable, id %d\n", + __func__, clock_id); + /* the entity ID we are looking at is a selector. * find out what it currently selects */ ret = uac_clock_selector_get_val(chip, clock_id); @@ -326,7 +344,7 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, if (ret > 0) { /* Skip setting clock selector again for some devices */ if (chip->quirk_flags & QUIRK_FLAG_SKIP_CLOCK_SELECTOR || - pins == 1) + pins == 1 || !writeable) return ret; err = uac_clock_selector_set_val(chip, entity_id, cur); if (err < 0) @@ -337,6 +355,9 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, return ret; find_others: + if (!writeable) + return -ENXIO; + /* The current clock source is invalid, try others. */ for (i = 1; i <= pins; i++) { if (i == cur) -- GitLab From 7bbe8f0071dfa23fcc3b2864ec9f3b1aeb7ab2df Mon Sep 17 00:00:00 2001 From: Sun Haiyong Date: Sat, 6 Jan 2024 17:41:29 +0800 Subject: [PATCH 0210/1405] perf tools: Fix calloc() arguments to address error introduced in gcc-14 the definition of calloc is as follows: void *calloc(size_t nmemb, size_t size); number of members is in the first parameter and the size is in the second parameter. Fix error messages on gcc 14 20240102: error: 'calloc' sizes specified with 'sizeof' in the earlier argument and not in the later argument [-Werror=calloc-transposed-args] Committer notes: I noticed this on fedora 40 and rawhide. Signed-off-by: Sun Haiyong Acked-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20240106094129.3337057-1-siyanteng@loongson.cn Signed-off-by: Yanteng Si Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 4 ++-- tools/perf/util/hist.c | 4 ++-- tools/perf/util/metricgroup.c | 2 +- tools/perf/util/synthetic-events.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 91e6828c38cc2..86c9101251726 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -4080,8 +4080,8 @@ int cmd_record(int argc, const char **argv) } if (rec->switch_output.num_files) { - rec->switch_output.filenames = calloc(sizeof(char *), - rec->switch_output.num_files); + rec->switch_output.filenames = calloc(rec->switch_output.num_files, + sizeof(char *)); if (!rec->switch_output.filenames) { err = -EINVAL; goto out_opts; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 0888b7163b7cc..fa359180ebf8f 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -491,8 +491,8 @@ static int hist_entry__init(struct hist_entry *he, } if (symbol_conf.res_sample) { - he->res_samples = calloc(sizeof(struct res_sample), - symbol_conf.res_sample); + he->res_samples = calloc(symbol_conf.res_sample, + sizeof(struct res_sample)); if (!he->res_samples) goto err_srcline; } diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index ca3e0404f1872..966cca5a3e88c 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -286,7 +286,7 @@ static int setup_metric_events(const char *pmu, struct hashmap *ids, *out_metric_events = NULL; ids_size = hashmap__size(ids); - metric_events = calloc(sizeof(void *), ids_size + 1); + metric_events = calloc(ids_size + 1, sizeof(void *)); if (!metric_events) return -ENOMEM; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 3712186353fb9..2a0289c149599 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1055,11 +1055,11 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (thread_nr > n) thread_nr = n; - synthesize_threads = calloc(sizeof(pthread_t), thread_nr); + synthesize_threads = calloc(thread_nr, sizeof(pthread_t)); if (synthesize_threads == NULL) goto free_dirent; - args = calloc(sizeof(*args), thread_nr); + args = calloc(thread_nr, sizeof(*args)); if (args == NULL) goto free_threads; -- GitLab From a7a6a01f88e87dec4bf2365571dd2dc7403d52d0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 26 Jan 2024 12:14:30 +0100 Subject: [PATCH 0211/1405] x86/efistub: Give up if memory attribute protocol returns an error The recently introduced EFI memory attributes protocol should be used if it exists to ensure that the memory allocation created for the kernel permits execution. This is needed for compatibility with tightened requirements related to Windows logo certification for x86 PCs. Currently, we simply strip the execute protect (XP) attribute from the entire range, but this might be rejected under some firmware security policies, and so in a subsequent patch, this will be changed to only strip XP from the executable region that runs early, and make it read-only (RO) as well. In order to catch any issues early, ensure that the memory attribute protocol works as intended, and give up if it produces spurious errors. Note that the DXE services based fallback was always based on best effort, so don't propagate any errors returned by that API. Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/x86-stub.c | 24 ++++++++++++++---------- drivers/firmware/efi/libstub/x86-stub.h | 4 ++-- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 0d510c9a06a45..cb0be88c81317 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -223,8 +223,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) } } -void efi_adjust_memory_range_protection(unsigned long start, - unsigned long size) +efi_status_t efi_adjust_memory_range_protection(unsigned long start, + unsigned long size) { efi_status_t status; efi_gcd_memory_space_desc_t desc; @@ -236,13 +236,17 @@ void efi_adjust_memory_range_protection(unsigned long start, rounded_end = roundup(start + size, EFI_PAGE_SIZE); if (memattr != NULL) { - efi_call_proto(memattr, clear_memory_attributes, rounded_start, - rounded_end - rounded_start, EFI_MEMORY_XP); - return; + status = efi_call_proto(memattr, clear_memory_attributes, + rounded_start, + rounded_end - rounded_start, + EFI_MEMORY_XP); + if (status != EFI_SUCCESS) + efi_warn("Failed to clear EFI_MEMORY_XP attribute\n"); + return status; } if (efi_dxe_table == NULL) - return; + return EFI_SUCCESS; /* * Don't modify memory region attributes, they are @@ -255,7 +259,7 @@ void efi_adjust_memory_range_protection(unsigned long start, status = efi_dxe_call(get_memory_space_descriptor, start, &desc); if (status != EFI_SUCCESS) - return; + break; next = desc.base_address + desc.length; @@ -280,8 +284,10 @@ void efi_adjust_memory_range_protection(unsigned long start, unprotect_start, unprotect_start + unprotect_size, status); + break; } } + return EFI_SUCCESS; } static void setup_unaccepted_memory(void) @@ -805,9 +811,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) *kernel_entry = addr + entry; - efi_adjust_memory_range_protection(addr, kernel_total_size); - - return EFI_SUCCESS; + return efi_adjust_memory_range_protection(addr, kernel_total_size); } static void __noreturn enter_kernel(unsigned long kernel_addr, diff --git a/drivers/firmware/efi/libstub/x86-stub.h b/drivers/firmware/efi/libstub/x86-stub.h index 37c5a36b9d8cf..1c20e99a64944 100644 --- a/drivers/firmware/efi/libstub/x86-stub.h +++ b/drivers/firmware/efi/libstub/x86-stub.h @@ -5,8 +5,8 @@ extern void trampoline_32bit_src(void *, bool); extern const u16 trampoline_ljmp_imm_offset; -void efi_adjust_memory_range_protection(unsigned long start, - unsigned long size); +efi_status_t efi_adjust_memory_range_protection(unsigned long start, + unsigned long size); #ifdef CONFIG_X86_64 efi_status_t efi_setup_5level_paging(void); -- GitLab From 47c5dd66c1840524572dcdd956f4af2bdb6fbdff Mon Sep 17 00:00:00 2001 From: Guixin Liu Date: Fri, 26 Jan 2024 16:26:43 +0800 Subject: [PATCH 0212/1405] nvmet-tcp: fix nvme tcp ida memory leak The nvmet_tcp_queue_ida should be destroy when the nvmet-tcp module exit. Signed-off-by: Guixin Liu Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index da0469978d6f4..c8655fc5aa5b8 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -2216,6 +2216,7 @@ static void __exit nvmet_tcp_exit(void) flush_workqueue(nvmet_wq); destroy_workqueue(nvmet_tcp_wq); + ida_destroy(&nvmet_tcp_queue_ida); } module_init(nvmet_tcp_init); -- GitLab From 00aab7dcb2267f2aef59447602f34501efe1a07f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 26 Jan 2024 18:09:01 +0100 Subject: [PATCH 0213/1405] HID: i2c-hid-of: fix NULL-deref on failed power up A while back the I2C HID implementation was split in an ACPI and OF part, but the new OF driver never initialises the client pointer which is dereferenced on power-up failures. Fixes: b33752c30023 ("HID: i2c-hid: Reorganize so ACPI and OF are separate modules") Cc: stable@vger.kernel.org # 5.12 Cc: Douglas Anderson Signed-off-by: Johan Hovold Reviewed-by: Douglas Anderson Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/i2c-hid-of.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/i2c-hid/i2c-hid-of.c b/drivers/hid/i2c-hid/i2c-hid-of.c index c4e1fa0273c84..8be4d576da773 100644 --- a/drivers/hid/i2c-hid/i2c-hid-of.c +++ b/drivers/hid/i2c-hid/i2c-hid-of.c @@ -87,6 +87,7 @@ static int i2c_hid_of_probe(struct i2c_client *client) if (!ihid_of) return -ENOMEM; + ihid_of->client = client; ihid_of->ops.power_up = i2c_hid_of_power_up; ihid_of->ops.power_down = i2c_hid_of_power_down; -- GitLab From 97aab852c4b9e1b378de48a55f8c9b8d76c36ccc Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 22 Jan 2024 07:49:52 -0800 Subject: [PATCH 0214/1405] hwmon: gigabyte_waterforce: Fix locking bug in waterforce_get_status() Goto 'unlock_and_return' for unlocking before returning on the error path. Fixes: d5939a793693 ("hwmon: Add driver for Gigabyte AORUS Waterforce AIO coolers") Signed-off-by: Harshit Mogalapalli Reviewed-by: Aleksa Savic Link: https://lore.kernel.org/r/20240122154952.2851934-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Guenter Roeck --- drivers/hwmon/gigabyte_waterforce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/gigabyte_waterforce.c b/drivers/hwmon/gigabyte_waterforce.c index 85e5237757142..8129d7b3ceaf9 100644 --- a/drivers/hwmon/gigabyte_waterforce.c +++ b/drivers/hwmon/gigabyte_waterforce.c @@ -146,7 +146,7 @@ static int waterforce_get_status(struct waterforce_data *priv) /* Send command for getting status */ ret = waterforce_write_expanded(priv, get_status_cmd, GET_STATUS_CMD_LENGTH); if (ret < 0) - return ret; + goto unlock_and_return; ret = wait_for_completion_interruptible_timeout(&priv->status_report_received, msecs_to_jiffies(STATUS_VALIDITY)); -- GitLab From fcf67d82b8b878bdd95145382be43927bce07ec6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 26 Jan 2024 16:32:36 +0100 Subject: [PATCH 0215/1405] selftests: net: add missing config for big tcp tests The big_tcp test-case requires a few kernel knobs currently not specified in the net selftests config, causing the following failure: # selftests: net: big_tcp.sh # Error: Failed to load TC action module. # We have an error talking to the kernel ... # Testing for BIG TCP: # CLI GSO | GW GRO | GW GSO | SER GRO # ./big_tcp.sh: line 107: test: !=: unary operator expected ... # on on on on : [FAIL_on_link1] Add the missing configs Fixes: 6bb382bcf742 ("selftests: add a selftest for big tcp") Signed-off-by: Paolo Abeni Acked-by: Aaron Conole Acked-by: Xin Long Link: https://lore.kernel.org/all/21630ecea872fea13f071342ac64ef52a991a9b5.1706282943.git.pabeni@redhat.com/ Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 19ff750516609..413ab9abcf1bf 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -29,7 +29,9 @@ CONFIG_NF_NAT=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP6_NF_NAT=m +CONFIG_IP6_NF_RAW=m CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_RAW=m CONFIG_IPV6_GRE=m CONFIG_IPV6_SEG6_LWTUNNEL=y CONFIG_L2TP_ETH=m @@ -45,6 +47,8 @@ CONFIG_NF_TABLES=m CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_NAT=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GACT=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_U32=m @@ -55,6 +59,7 @@ CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y +CONFIG_NF_FLOW_TABLE=m CONFIG_PSAMPLE=m CONFIG_TCP_MD5SIG=y CONFIG_TEST_BLACKHOLE_DEV=m -- GitLab From 0958b33ef5a04ed91f61cef4760ac412080c4e08 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 26 Jan 2024 09:42:58 +0900 Subject: [PATCH 0216/1405] tracing/trigger: Fix to return error if failed to alloc snapshot Fix register_snapshot_trigger() to return error code if it failed to allocate a snapshot instead of 0 (success). Unless that, it will register snapshot trigger without an error. Link: https://lore.kernel.org/linux-trace-kernel/170622977792.270660.2789298642759362200.stgit@devnote2 Fixes: 0bbe7f719985 ("tracing: Fix the race between registering 'snapshot' event trigger and triggering 'snapshot' operation") Cc: stable@vger.kernel.org Cc: Vincent Donnefort Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_trigger.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 46439e3bcec4d..b33c3861fbbbf 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1470,8 +1470,10 @@ register_snapshot_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) { - if (tracing_alloc_snapshot_instance(file->tr) != 0) - return 0; + int ret = tracing_alloc_snapshot_instance(file->tr); + + if (ret < 0) + return ret; return register_trigger(glob, data, file); } -- GitLab From c60fe56c169eb552113a4711db3a5f99e3acd4c1 Mon Sep 17 00:00:00 2001 From: Konstantin Aladyshev Date: Fri, 26 Jan 2024 23:57:14 +0300 Subject: [PATCH 0217/1405] hwmon: (pmbus/mp2975) Fix driver initialization for MP2975 device The commit 1feb31e810b0 ("hwmon: (pmbus/mp2975) Simplify VOUT code") has introduced a bug that makes it impossible to initialize MP2975 device: """ mp2975 5-0020: Failed to identify chip capabilities i2c i2c-5: new_device: Instantiated device mp2975 at 0x20 i2c i2c-5: delete_device: Deleting device mp2975 at 0x20 """ Since the 'read_byte_data' function was removed from the 'pmbus_driver_info ' structure the driver no longer reports correctly that VOUT mode is direct. Therefore 'pmbus_identify_common' fails with error, making it impossible to initialize the device. Restore 'read_byte_data' function to fix the issue. Tested: - before: it is not possible to initialize MP2975 device with the 'mp2975' driver, - after: 'mp2975' correctly initializes MP2975 device and all sensor data is correct. Fixes: 1feb31e810b0 ("hwmon: (pmbus/mp2975) Simplify VOUT code") Signed-off-by: Konstantin Aladyshev Link: https://lore.kernel.org/r/20240126205714.2363-1-aladyshev22@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/mp2975.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/hwmon/pmbus/mp2975.c b/drivers/hwmon/pmbus/mp2975.c index b9bb469e2d8fe..5bbfdacb61a76 100644 --- a/drivers/hwmon/pmbus/mp2975.c +++ b/drivers/hwmon/pmbus/mp2975.c @@ -126,6 +126,22 @@ static const struct regulator_desc __maybe_unused mp2975_reg_desc[] = { #define to_mp2975_data(x) container_of(x, struct mp2975_data, info) +static int mp2975_read_byte_data(struct i2c_client *client, int page, int reg) +{ + switch (reg) { + case PMBUS_VOUT_MODE: + /* + * Enforce VOUT direct format, since device allows to set the + * different formats for the different rails. Conversion from + * VID to direct provided by driver internally, in case it is + * necessary. + */ + return PB_VOUT_MODE_DIRECT; + default: + return -ENODATA; + } +} + static int mp2975_read_word_helper(struct i2c_client *client, int page, int phase, u8 reg, u16 mask) @@ -869,6 +885,7 @@ static struct pmbus_driver_info mp2975_info = { PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT | PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP | PMBUS_HAVE_POUT | PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT | PMBUS_PHASE_VIRTUAL, + .read_byte_data = mp2975_read_byte_data, .read_word_data = mp2975_read_word_data, #if IS_ENABLED(CONFIG_SENSORS_MP2975_REGULATOR) .num_regulators = 1, -- GitLab From d0266d7ab1618482d58015d67a5220e590333298 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 23 Jan 2024 17:00:53 +0100 Subject: [PATCH 0218/1405] Revert "power: supply: qcom_battmgr: Register the power supplies after PDR is up" This reverts commit b43f7ddc2b7a5a90447d96cb4d3c6d142dd4a810. The offending commit deferred power-supply class device registration until the service-started notification is received. This triggers a NULL pointer dereference during boot of the Lenovo ThinkPad X13s and SC8280XP CRD as battery status notifications can be received before the service-start notification: Unable to handle kernel NULL pointer dereference at virtual address 00000000000005c0 ... Call trace: _acquire+0x338/0x2064 acquire+0x1e8/0x318 spin_lock_irqsave+0x60/0x88 _supply_changed+0x2c/0xa4 battmgr_callback+0x1d4/0x60c [qcom_battmgr] pmic_glink_rpmsg_callback+0x5c/0xa4 [pmic_glink] qcom_glink_native_rx+0x58c/0x7e8 qcom_glink_smem_intr+0x14/0x24 [qcom_glink_smem] __handle_irq_event_percpu+0xb0/0x2d4 handle_irq_event+0x4c/0xb8 As trying to serialise this is non-trivial and risks missing notifications, let's revert to registration during probe so that the driver data is all set up once the service goes live. The warning message during resume in case the aDSP firmware is not running that motivated the change can be considered a feature and should not be suppressed. Fixes: b43f7ddc2b7a ("power: supply: qcom_battmgr: Register the power supplies after PDR is up") Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240123160053.18331-1-johan+linaro@kernel.org Signed-off-by: Sebastian Reichel --- drivers/power/supply/qcom_battmgr.c | 109 +++++++++++++--------------- 1 file changed, 49 insertions(+), 60 deletions(-) diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c index a12e2a66d516f..ec163d1bcd189 100644 --- a/drivers/power/supply/qcom_battmgr.c +++ b/drivers/power/supply/qcom_battmgr.c @@ -282,7 +282,6 @@ struct qcom_battmgr_wireless { struct qcom_battmgr { struct device *dev; - struct auxiliary_device *adev; struct pmic_glink_client *client; enum qcom_battmgr_variant variant; @@ -1294,69 +1293,11 @@ static void qcom_battmgr_enable_worker(struct work_struct *work) dev_err(battmgr->dev, "failed to request power notifications\n"); } -static char *qcom_battmgr_battery[] = { "battery" }; - -static void qcom_battmgr_register_psy(struct qcom_battmgr *battmgr) -{ - struct power_supply_config psy_cfg_supply = {}; - struct auxiliary_device *adev = battmgr->adev; - struct power_supply_config psy_cfg = {}; - struct device *dev = &adev->dev; - - psy_cfg.drv_data = battmgr; - psy_cfg.of_node = adev->dev.of_node; - - psy_cfg_supply.drv_data = battmgr; - psy_cfg_supply.of_node = adev->dev.of_node; - psy_cfg_supply.supplied_to = qcom_battmgr_battery; - psy_cfg_supply.num_supplicants = 1; - - if (battmgr->variant == QCOM_BATTMGR_SC8280XP) { - battmgr->bat_psy = devm_power_supply_register(dev, &sc8280xp_bat_psy_desc, &psy_cfg); - if (IS_ERR(battmgr->bat_psy)) - dev_err(dev, "failed to register battery power supply (%ld)\n", - PTR_ERR(battmgr->bat_psy)); - - battmgr->ac_psy = devm_power_supply_register(dev, &sc8280xp_ac_psy_desc, &psy_cfg_supply); - if (IS_ERR(battmgr->ac_psy)) - dev_err(dev, "failed to register AC power supply (%ld)\n", - PTR_ERR(battmgr->ac_psy)); - - battmgr->usb_psy = devm_power_supply_register(dev, &sc8280xp_usb_psy_desc, &psy_cfg_supply); - if (IS_ERR(battmgr->usb_psy)) - dev_err(dev, "failed to register USB power supply (%ld)\n", - PTR_ERR(battmgr->usb_psy)); - - battmgr->wls_psy = devm_power_supply_register(dev, &sc8280xp_wls_psy_desc, &psy_cfg_supply); - if (IS_ERR(battmgr->wls_psy)) - dev_err(dev, "failed to register wireless charing power supply (%ld)\n", - PTR_ERR(battmgr->wls_psy)); - } else { - battmgr->bat_psy = devm_power_supply_register(dev, &sm8350_bat_psy_desc, &psy_cfg); - if (IS_ERR(battmgr->bat_psy)) - dev_err(dev, "failed to register battery power supply (%ld)\n", - PTR_ERR(battmgr->bat_psy)); - - battmgr->usb_psy = devm_power_supply_register(dev, &sm8350_usb_psy_desc, &psy_cfg_supply); - if (IS_ERR(battmgr->usb_psy)) - dev_err(dev, "failed to register USB power supply (%ld)\n", - PTR_ERR(battmgr->usb_psy)); - - battmgr->wls_psy = devm_power_supply_register(dev, &sm8350_wls_psy_desc, &psy_cfg_supply); - if (IS_ERR(battmgr->wls_psy)) - dev_err(dev, "failed to register wireless charing power supply (%ld)\n", - PTR_ERR(battmgr->wls_psy)); - } -} - static void qcom_battmgr_pdr_notify(void *priv, int state) { struct qcom_battmgr *battmgr = priv; if (state == SERVREG_SERVICE_STATE_UP) { - if (!battmgr->bat_psy) - qcom_battmgr_register_psy(battmgr); - battmgr->service_up = true; schedule_work(&battmgr->enable_work); } else { @@ -1371,9 +1312,13 @@ static const struct of_device_id qcom_battmgr_of_variants[] = { {} }; +static char *qcom_battmgr_battery[] = { "battery" }; + static int qcom_battmgr_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) { + struct power_supply_config psy_cfg_supply = {}; + struct power_supply_config psy_cfg = {}; const struct of_device_id *match; struct qcom_battmgr *battmgr; struct device *dev = &adev->dev; @@ -1383,7 +1328,14 @@ static int qcom_battmgr_probe(struct auxiliary_device *adev, return -ENOMEM; battmgr->dev = dev; - battmgr->adev = adev; + + psy_cfg.drv_data = battmgr; + psy_cfg.of_node = adev->dev.of_node; + + psy_cfg_supply.drv_data = battmgr; + psy_cfg_supply.of_node = adev->dev.of_node; + psy_cfg_supply.supplied_to = qcom_battmgr_battery; + psy_cfg_supply.num_supplicants = 1; INIT_WORK(&battmgr->enable_work, qcom_battmgr_enable_worker); mutex_init(&battmgr->lock); @@ -1395,6 +1347,43 @@ static int qcom_battmgr_probe(struct auxiliary_device *adev, else battmgr->variant = QCOM_BATTMGR_SM8350; + if (battmgr->variant == QCOM_BATTMGR_SC8280XP) { + battmgr->bat_psy = devm_power_supply_register(dev, &sc8280xp_bat_psy_desc, &psy_cfg); + if (IS_ERR(battmgr->bat_psy)) + return dev_err_probe(dev, PTR_ERR(battmgr->bat_psy), + "failed to register battery power supply\n"); + + battmgr->ac_psy = devm_power_supply_register(dev, &sc8280xp_ac_psy_desc, &psy_cfg_supply); + if (IS_ERR(battmgr->ac_psy)) + return dev_err_probe(dev, PTR_ERR(battmgr->ac_psy), + "failed to register AC power supply\n"); + + battmgr->usb_psy = devm_power_supply_register(dev, &sc8280xp_usb_psy_desc, &psy_cfg_supply); + if (IS_ERR(battmgr->usb_psy)) + return dev_err_probe(dev, PTR_ERR(battmgr->usb_psy), + "failed to register USB power supply\n"); + + battmgr->wls_psy = devm_power_supply_register(dev, &sc8280xp_wls_psy_desc, &psy_cfg_supply); + if (IS_ERR(battmgr->wls_psy)) + return dev_err_probe(dev, PTR_ERR(battmgr->wls_psy), + "failed to register wireless charing power supply\n"); + } else { + battmgr->bat_psy = devm_power_supply_register(dev, &sm8350_bat_psy_desc, &psy_cfg); + if (IS_ERR(battmgr->bat_psy)) + return dev_err_probe(dev, PTR_ERR(battmgr->bat_psy), + "failed to register battery power supply\n"); + + battmgr->usb_psy = devm_power_supply_register(dev, &sm8350_usb_psy_desc, &psy_cfg_supply); + if (IS_ERR(battmgr->usb_psy)) + return dev_err_probe(dev, PTR_ERR(battmgr->usb_psy), + "failed to register USB power supply\n"); + + battmgr->wls_psy = devm_power_supply_register(dev, &sm8350_wls_psy_desc, &psy_cfg_supply); + if (IS_ERR(battmgr->wls_psy)) + return dev_err_probe(dev, PTR_ERR(battmgr->wls_psy), + "failed to register wireless charing power supply\n"); + } + battmgr->client = devm_pmic_glink_register_client(dev, PMIC_GLINK_OWNER_BATTMGR, qcom_battmgr_callback, -- GitLab From c3dfcdb65ec1a4813ec1e0871c52c671ba9c71ac Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Thu, 25 Jan 2024 20:39:16 +0800 Subject: [PATCH 0219/1405] net/smc: fix incorrect SMC-D link group matching logic The logic to determine if SMC-D link group matches is incorrect. The correct logic should be that it only returns true when the GID is the same, and the SMC-D device is the same and the extended GID is the same (in the case of virtual ISM). It can be fixed by adding brackets around the conditional (or ternary) operator expression. But for better readability and maintainability, it has been changed to an if-else statement. Reported-by: Matthew Rosato Closes: https://lore.kernel.org/r/13579588-eb9d-4626-a063-c0b77ed80f11@linux.ibm.com Fixes: b40584d14570 ("net/smc: compatible with 128-bits extended GID of virtual ISM device") Link: https://lore.kernel.org/r/13579588-eb9d-4626-a063-c0b77ed80f11@linux.ibm.com Signed-off-by: Wen Gu Reviewed-by: Alexandra Winter Link: https://lore.kernel.org/r/20240125123916.77928-1-guwen@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/smc_core.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 95cc95458e2d8..e4c858411207a 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1877,9 +1877,15 @@ static bool smcd_lgr_match(struct smc_link_group *lgr, struct smcd_dev *smcismdev, struct smcd_gid *peer_gid) { - return lgr->peer_gid.gid == peer_gid->gid && lgr->smcd == smcismdev && - smc_ism_is_virtual(smcismdev) ? - (lgr->peer_gid.gid_ext == peer_gid->gid_ext) : 1; + if (lgr->peer_gid.gid != peer_gid->gid || + lgr->smcd != smcismdev) + return false; + + if (smc_ism_is_virtual(smcismdev) && + lgr->peer_gid.gid_ext != peer_gid->gid_ext) + return false; + + return true; } /* create a new SMC connection (and a new link group if necessary) */ -- GitLab From 281cb9d65a95c00bb844f332cd187491d2d55496 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 25 Jan 2024 05:41:03 -0800 Subject: [PATCH 0220/1405] bnxt_en: Make PTP timestamp HWRM more silent commit 056bce63c469 ("bnxt_en: Make PTP TX timestamp HWRM query silent") changed a netdev_err() to netdev_WARN_ONCE(). netdev_WARN_ONCE() is it generates a kernel WARNING, which is bad, for the following reasons: * You do not a kernel warning if the firmware queries are late * In busy networks, timestamp query failures fairly regularly * A WARNING message doesn't bring much value, since the code path is clear. (This was discussed in-depth in [1]) Transform the netdev_WARN_ONCE() into a netdev_warn_once(), and print a more well-behaved message, instead of a full WARN(). bnxt_en 0000:67:00.0 eth0: TS query for TX timer failed rc = fffffff5 [1] Link: https://lore.kernel.org/all/ZbDj%2FFI4EJezcfd1@gmail.com/ Signed-off-by: Breno Leitao Reviewed-by: Pavan Chebbi Reviewed-by: Michael Chan Fixes: 056bce63c469 ("bnxt_en: Make PTP TX timestamp HWRM query silent") Link: https://lore.kernel.org/r/20240125134104.2045573-1-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index adad188e38b82..cc07660330f53 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -684,7 +684,7 @@ static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb) timestamp.hwtstamp = ns_to_ktime(ns); skb_tstamp_tx(ptp->tx_skb, ×tamp); } else { - netdev_WARN_ONCE(bp->dev, + netdev_warn_once(bp->dev, "TS query for TX timer failed rc = %x\n", rc); } -- GitLab From d3cb3b0088ca92082e2bebc40cc6894a632173e2 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 25 Jan 2024 09:22:50 +0100 Subject: [PATCH 0221/1405] selftests: net: add missing required classifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the udpgro_fraglist self-test uses the BPF classifiers, but the current net self-test configuration does not include it, causing CI failures: # selftests: net: udpgro_frglist.sh # ipv6 # tcp - over veth touching data # -l 4 -6 -D 2001:db8::1 -t rx -4 -t # Error: TC classifier not found. # We have an error talking to the kernel # Error: TC classifier not found. # We have an error talking to the kernel Add the missing knob. Fixes: edae34a3ed92 ("selftests net: add UDP GRO fraglist + bpf self-tests") Signed-off-by: Paolo Abeni Reviewed-by: Maciej Żenczykowski Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/7c3643763b331e9a400e1874fe089193c99a1c3f.1706170897.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 413ab9abcf1bf..56da5d52674cf 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -51,6 +51,7 @@ CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GACT=m CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_BPF=m CONFIG_NET_CLS_U32=m CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m -- GitLab From 89abe628375301fedb68770644df845d49018d8b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 25 Jan 2024 19:09:06 +0100 Subject: [PATCH 0222/1405] selftests: net: give more time for GRO aggregation The gro.sh test-case relay on the gro_flush_timeout to ensure that all the segments belonging to any given batch are properly aggregated. The other end, the sender is a user-space program transmitting each packet with a separate write syscall. A busy host and/or stracing the sender program can make the relevant segments reach the GRO engine after the flush timeout triggers. Give the GRO flush timeout more slack, to avoid sporadic self-tests failures. Fixes: 9af771d2ec04 ("selftests/net: allow GRO coalesce test on veth") Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Tested-by: Eric Dumazet Link: https://lore.kernel.org/r/bffec2beab3a5672dd13ecabe4fad81d2155b367.1706206101.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/setup_veth.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh index a9a1759e035ca..1f78a87f6f37e 100644 --- a/tools/testing/selftests/net/setup_veth.sh +++ b/tools/testing/selftests/net/setup_veth.sh @@ -11,7 +11,7 @@ setup_veth_ns() { local -r ns_mac="$4" [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" - echo 100000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" + echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535 ip -netns "${ns_name}" link set dev "${ns_dev}" up -- GitLab From 8d975c15c0cd744000ca386247432d57b21f9df0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 25 Jan 2024 17:05:57 +0000 Subject: [PATCH 0223/1405] ip6_tunnel: make sure to pull inner header in __ip6_tnl_rcv() syzbot found __ip6_tnl_rcv() could access unitiliazed data [1]. Call pskb_inet_may_pull() to fix this, and initialize ipv6h variable after this call as it can change skb->head. [1] BUG: KMSAN: uninit-value in __INET_ECN_decapsulate include/net/inet_ecn.h:253 [inline] BUG: KMSAN: uninit-value in INET_ECN_decapsulate include/net/inet_ecn.h:275 [inline] BUG: KMSAN: uninit-value in IP6_ECN_decapsulate+0x7df/0x1e50 include/net/inet_ecn.h:321 __INET_ECN_decapsulate include/net/inet_ecn.h:253 [inline] INET_ECN_decapsulate include/net/inet_ecn.h:275 [inline] IP6_ECN_decapsulate+0x7df/0x1e50 include/net/inet_ecn.h:321 ip6ip6_dscp_ecn_decapsulate+0x178/0x1b0 net/ipv6/ip6_tunnel.c:727 __ip6_tnl_rcv+0xd4e/0x1590 net/ipv6/ip6_tunnel.c:845 ip6_tnl_rcv+0xce/0x100 net/ipv6/ip6_tunnel.c:888 gre_rcv+0x143f/0x1870 ip6_protocol_deliver_rcu+0xda6/0x2a60 net/ipv6/ip6_input.c:438 ip6_input_finish net/ipv6/ip6_input.c:483 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] ip6_input+0x15d/0x430 net/ipv6/ip6_input.c:492 ip6_mc_input+0xa7e/0xc80 net/ipv6/ip6_input.c:586 dst_input include/net/dst.h:461 [inline] ip6_rcv_finish+0x5db/0x870 net/ipv6/ip6_input.c:79 NF_HOOK include/linux/netfilter.h:314 [inline] ipv6_rcv+0xda/0x390 net/ipv6/ip6_input.c:310 __netif_receive_skb_one_core net/core/dev.c:5532 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5646 netif_receive_skb_internal net/core/dev.c:5732 [inline] netif_receive_skb+0x58/0x660 net/core/dev.c:5791 tun_rx_batched+0x3ee/0x980 drivers/net/tun.c:1555 tun_get_user+0x53af/0x66d0 drivers/net/tun.c:2002 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:2084 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0x786/0x1200 fs/read_write.c:590 ksys_write+0x20f/0x4c0 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:652 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x6d/0x140 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: slab_post_alloc_hook+0x129/0xa70 mm/slab.h:768 slab_alloc_node mm/slub.c:3478 [inline] kmem_cache_alloc_node+0x5e9/0xb10 mm/slub.c:3523 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:560 __alloc_skb+0x318/0x740 net/core/skbuff.c:651 alloc_skb include/linux/skbuff.h:1286 [inline] alloc_skb_with_frags+0xc8/0xbd0 net/core/skbuff.c:6334 sock_alloc_send_pskb+0xa80/0xbf0 net/core/sock.c:2787 tun_alloc_skb drivers/net/tun.c:1531 [inline] tun_get_user+0x1e8a/0x66d0 drivers/net/tun.c:1846 tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048 call_write_iter include/linux/fs.h:2084 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0x786/0x1200 fs/read_write.c:590 ksys_write+0x20f/0x4c0 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0x93/0xd0 fs/read_write.c:652 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x6d/0x140 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b CPU: 0 PID: 5034 Comm: syz-executor331 Not tainted 6.7.0-syzkaller-00562-g9f8413c4a66f #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 Fixes: 0d3c703a9d17 ("ipv6: Cleanup IPv6 tunnel receive path") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240125170557.2663942-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_tunnel.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 46c19bd489901..9bbabf750a21e 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -796,8 +796,8 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, struct sk_buff *skb), bool log_ecn_err) { - const struct ipv6hdr *ipv6h = ipv6_hdr(skb); - int err; + const struct ipv6hdr *ipv6h; + int nh, err; if ((!(tpi->flags & TUNNEL_CSUM) && (tunnel->parms.i_flags & TUNNEL_CSUM)) || @@ -829,7 +829,6 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, goto drop; } - ipv6h = ipv6_hdr(skb); skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } else { @@ -837,7 +836,23 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, skb_reset_mac_header(skb); } + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(tunnel->dev, rx_length_errors); + DEV_STATS_INC(tunnel->dev, rx_errors); + goto drop; + } + + /* Get the outer header. */ + ipv6h = (struct ipv6hdr *)(skb->head + nh); + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); -- GitLab From dfa988b4c7c3a48bde7c2713308920c7741fff29 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Wed, 24 Jan 2024 05:17:25 +0000 Subject: [PATCH 0224/1405] net: dsa: mt7530: fix 10M/100M speed on MT7988 switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setup PMCR port register for actual speed and duplex on internally connected PHYs of the MT7988 built-in switch. This fixes links with speeds other than 1000M. Fixes: 110c18bfed41 ("net: dsa: mt7530: introduce driver for MT7988 built-in switch") Signed-off-by: Daniel Golle Reviewed-by: Vladimir Oltean Acked-by: Arınç ÜNAL Link: https://lore.kernel.org/r/a5b04dfa8256d8302f402545a51ac4c626fdba25.1706071272.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 391c4dbdff428..3c1f657593a8f 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2838,8 +2838,7 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port, /* MT753x MAC works in 1G full duplex mode for all up-clocked * variants. */ - if (interface == PHY_INTERFACE_MODE_INTERNAL || - interface == PHY_INTERFACE_MODE_TRGMII || + if (interface == PHY_INTERFACE_MODE_TRGMII || (phy_interface_mode_is_8023z(interface))) { speed = SPEED_1000; duplex = DUPLEX_FULL; -- GitLab From 99b817c173cd213671daecd25ca27f56b0c7c4ec Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 26 Jan 2024 11:44:03 +0100 Subject: [PATCH 0225/1405] lsm: fix the logic in security_inode_getsecctx() The inode_getsecctx LSM hook has previously been corrected to have -EOPNOTSUPP instead of 0 as the default return value to fix BPF LSM behavior. However, the call_int_hook()-generated loop in security_inode_getsecctx() was left treating 0 as the neutral value, so after an LSM returns 0, the loop continues to try other LSMs, and if one of them returns a non-zero value, the function immediately returns with said value. So in a situation where SELinux and the BPF LSMs registered this hook, -EOPNOTSUPP would be incorrectly returned whenever SELinux returned 0. Fix this by open-coding the call_int_hook() loop and making it use the correct LSM_RET_DEFAULT() value as the neutral one, similar to what other hooks do. Cc: stable@vger.kernel.org Reported-by: Stephen Smalley Link: https://lore.kernel.org/selinux/CAEjxPJ4ev-pasUwGx48fDhnmjBnq_Wh90jYPwRQRAqXxmOKD4Q@mail.gmail.com/ Link: https://bugzilla.redhat.com/show_bug.cgi?id=2257983 Fixes: b36995b8609a ("lsm: fix default return value for inode_getsecctx") Signed-off-by: Ondrej Mosnacek Reviewed-by: Casey Schaufler [PM: subject line tweak] Signed-off-by: Paul Moore --- security/security.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/security/security.c b/security/security.c index 0144a98d3712e..6196ccaba433a 100644 --- a/security/security.c +++ b/security/security.c @@ -4255,7 +4255,19 @@ EXPORT_SYMBOL(security_inode_setsecctx); */ int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) { - return call_int_hook(inode_getsecctx, -EOPNOTSUPP, inode, ctx, ctxlen); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.inode_getsecctx, list) { + rc = hp->hook.inode_getsecctx(inode, ctx, ctxlen); + if (rc != LSM_RET_DEFAULT(inode_getsecctx)) + return rc; + } + + return LSM_RET_DEFAULT(inode_getsecctx); } EXPORT_SYMBOL(security_inode_getsecctx); -- GitLab From d9281660ff3ffb4a05302b485cc59a87e709aefc Mon Sep 17 00:00:00 2001 From: Chunhai Guo Date: Fri, 26 Jan 2024 22:01:42 +0800 Subject: [PATCH 0226/1405] erofs: relaxed temporary buffers allocation on readahead Even with inplace decompression, sometimes very few temporary buffers may be still needed for a single decompression shot (e.g. 16 pages for 64k sliding window or 4 pages for 16k sliding window). In low-memory scenarios, it would be better to try to allocate with GFP_NOWAIT on readahead first. That can help reduce the time spent on page allocation under durative memory pressure. Here are detailed performance numbers under multi-app launch benchmark workload [1] on ARM64 Android devices (8-core CPU and 8GB of memory) running a 5.15 LTS kernel with EROFS of 4k pclusters: +----------------------------------------------+ | LZ4 | vanilla | patched | diff | |----------------+---------+---------+---------| | Average (ms) | 3364 | 2684 | -20.21% | [64k sliding window] |----------------+---------+---------+---------| | Average (ms) | 2079 | 1610 | -22.56% | [16k sliding window] +----------------------------------------------+ The total size of system images for 4k pclusters is almost unchanged: (64k sliding window) 9,117,044 KB (16k sliding window) 9,113,096 KB Therefore, in addition to switch the sliding window from 64k to 16k, after applying this patch, it can eventually save 52.14% (3364 -> 1610) on average with no memory reservation. That is particularly useful for embedded devices with limited resources. [1] https://lore.kernel.org/r/20240109074143.4138783-1-guochunhai@vivo.com Suggested-by: Gao Xiang Signed-off-by: Chunhai Guo Signed-off-by: Gao Xiang Reviewed-by: Yue Hu Link: https://lore.kernel.org/r/20240126140142.201718-1-hsiangkao@linux.alibaba.com --- fs/erofs/compress.h | 5 ++--- fs/erofs/decompressor.c | 5 +++-- fs/erofs/decompressor_deflate.c | 19 +++++++++++++------ fs/erofs/decompressor_lzma.c | 17 ++++++++++++----- fs/erofs/zdata.c | 16 ++++++++++++---- 5 files changed, 42 insertions(+), 20 deletions(-) diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index 279933e007d21..7cc5841577b24 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -11,13 +11,12 @@ struct z_erofs_decompress_req { struct super_block *sb; struct page **in, **out; - unsigned short pageofs_in, pageofs_out; unsigned int inputsize, outputsize; - /* indicate the algorithm will be used for decompression */ - unsigned int alg; + unsigned int alg; /* the algorithm for decompression */ bool inplace_io, partial_decoding, fillgaps; + gfp_t gfp; /* allocation flags for extra temporary buffers */ }; struct z_erofs_decompressor { diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 072ef6a66823e..d4cee95af14c7 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -111,8 +111,9 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx, victim = availables[--top]; get_page(victim); } else { - victim = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + victim = erofs_allocpage(pagepool, rq->gfp); + if (!victim) + return -ENOMEM; set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE); } rq->out[i] = victim; diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c index 4a64a9c91dd32..b98872058abe8 100644 --- a/fs/erofs/decompressor_deflate.c +++ b/fs/erofs/decompressor_deflate.c @@ -95,7 +95,7 @@ int z_erofs_load_deflate_config(struct super_block *sb, } int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq, - struct page **pagepool) + struct page **pgpl) { const unsigned int nrpages_out = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; @@ -158,8 +158,12 @@ int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq, strm->z.avail_out = min_t(u32, outsz, PAGE_SIZE - pofs); outsz -= strm->z.avail_out; if (!rq->out[no]) { - rq->out[no] = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + rq->out[no] = erofs_allocpage(pgpl, rq->gfp); + if (!rq->out[no]) { + kout = NULL; + err = -ENOMEM; + break; + } set_page_private(rq->out[no], Z_EROFS_SHORTLIVED_PAGE); } @@ -211,8 +215,11 @@ int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq, DBG_BUGON(erofs_page_is_managed(EROFS_SB(sb), rq->in[j])); - tmppage = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + tmppage = erofs_allocpage(pgpl, rq->gfp); + if (!tmppage) { + err = -ENOMEM; + goto failed; + } set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE); copy_highpage(tmppage, rq->in[j]); rq->in[j] = tmppage; @@ -230,7 +237,7 @@ int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq, break; } } - +failed: if (zlib_inflateEnd(&strm->z) != Z_OK && !err) err = -EIO; if (kout) diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 2dd14f99c1dc1..6ca357d83cfa4 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -148,7 +148,7 @@ int z_erofs_load_lzma_config(struct super_block *sb, } int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, - struct page **pagepool) + struct page **pgpl) { const unsigned int nrpages_out = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; @@ -215,8 +215,11 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, PAGE_SIZE - pageofs); outlen -= strm->buf.out_size; if (!rq->out[no] && rq->fillgaps) { /* deduped */ - rq->out[no] = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + rq->out[no] = erofs_allocpage(pgpl, rq->gfp); + if (!rq->out[no]) { + err = -ENOMEM; + break; + } set_page_private(rq->out[no], Z_EROFS_SHORTLIVED_PAGE); } @@ -258,8 +261,11 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb), rq->in[j])); - tmppage = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + tmppage = erofs_allocpage(pgpl, rq->gfp); + if (!tmppage) { + err = -ENOMEM; + goto failed; + } set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE); copy_highpage(tmppage, rq->in[j]); rq->in[j] = tmppage; @@ -277,6 +283,7 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, break; } } +failed: if (no < nrpages_out && strm->buf.out) kunmap(rq->out[no]); if (ni < nrpages_in) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index c1c77166b30ff..ff0aa72b0db34 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -82,6 +82,9 @@ struct z_erofs_pcluster { /* L: indicate several pageofs_outs or not */ bool multibases; + /* L: whether extra buffer allocations are best-effort */ + bool besteffort; + /* A: compressed bvecs (can be cached or inplaced pages) */ struct z_erofs_bvec compressed_bvecs[]; }; @@ -960,7 +963,7 @@ static int z_erofs_read_fragment(struct super_block *sb, struct page *page, } static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, - struct page *page) + struct page *page, bool ra) { struct inode *const inode = fe->inode; struct erofs_map_blocks *const map = &fe->map; @@ -1010,6 +1013,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, err = z_erofs_pcluster_begin(fe); if (err) goto out; + fe->pcl->besteffort |= !ra; } /* @@ -1276,6 +1280,9 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, .inplace_io = overlapped, .partial_decoding = pcl->partial, .fillgaps = pcl->multibases, + .gfp = pcl->besteffort ? + GFP_KERNEL | __GFP_NOFAIL : + GFP_NOWAIT | __GFP_NORETRY }, be->pagepool); /* must handle all compressed pages before actual file pages */ @@ -1318,6 +1325,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, pcl->length = 0; pcl->partial = true; pcl->multibases = false; + pcl->besteffort = false; pcl->bvset.nextpage = NULL; pcl->vcnt = 0; @@ -1787,7 +1795,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, if (PageUptodate(page)) unlock_page(page); else - (void)z_erofs_do_read_page(f, page); + (void)z_erofs_do_read_page(f, page, !!rac); put_page(page); } @@ -1808,7 +1816,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio) f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT; z_erofs_pcluster_readmore(&f, NULL, true); - err = z_erofs_do_read_page(&f, &folio->page); + err = z_erofs_do_read_page(&f, &folio->page, false); z_erofs_pcluster_readmore(&f, NULL, false); z_erofs_pcluster_end(&f); @@ -1849,7 +1857,7 @@ static void z_erofs_readahead(struct readahead_control *rac) folio = head; head = folio_get_private(folio); - err = z_erofs_do_read_page(&f, &folio->page); + err = z_erofs_do_read_page(&f, &folio->page, true); if (err && err != -EINTR) erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu", folio->index, EROFS_I(inode)->nid); -- GitLab From e622502c310f1069fd9f41cd38210553115f610a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 25 Jan 2024 15:18:47 +0100 Subject: [PATCH 0227/1405] ipmr: fix kernel panic when forwarding mcast packets The stacktrace was: [ 86.305548] BUG: kernel NULL pointer dereference, address: 0000000000000092 [ 86.306815] #PF: supervisor read access in kernel mode [ 86.307717] #PF: error_code(0x0000) - not-present page [ 86.308624] PGD 0 P4D 0 [ 86.309091] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 86.309883] CPU: 2 PID: 3139 Comm: pimd Tainted: G U 6.8.0-6wind-knet #1 [ 86.311027] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.1-0-g0551a4be2c-prebuilt.qemu-project.org 04/01/2014 [ 86.312728] RIP: 0010:ip_mr_forward (/build/work/knet/net/ipv4/ipmr.c:1985) [ 86.313399] Code: f9 1f 0f 87 85 03 00 00 48 8d 04 5b 48 8d 04 83 49 8d 44 c5 00 48 8b 40 70 48 39 c2 0f 84 d9 00 00 00 49 8b 46 58 48 83 e0 fe <80> b8 92 00 00 00 00 0f 84 55 ff ff ff 49 83 47 38 01 45 85 e4 0f [ 86.316565] RSP: 0018:ffffad21c0583ae0 EFLAGS: 00010246 [ 86.317497] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 86.318596] RDX: ffff9559cb46c000 RSI: 0000000000000000 RDI: 0000000000000000 [ 86.319627] RBP: ffffad21c0583b30 R08: 0000000000000000 R09: 0000000000000000 [ 86.320650] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000001 [ 86.321672] R13: ffff9559c093a000 R14: ffff9559cc00b800 R15: ffff9559c09c1d80 [ 86.322873] FS: 00007f85db661980(0000) GS:ffff955a79d00000(0000) knlGS:0000000000000000 [ 86.324291] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 86.325314] CR2: 0000000000000092 CR3: 000000002f13a000 CR4: 0000000000350ef0 [ 86.326589] Call Trace: [ 86.327036] [ 86.327434] ? show_regs (/build/work/knet/arch/x86/kernel/dumpstack.c:479) [ 86.328049] ? __die (/build/work/knet/arch/x86/kernel/dumpstack.c:421 /build/work/knet/arch/x86/kernel/dumpstack.c:434) [ 86.328508] ? page_fault_oops (/build/work/knet/arch/x86/mm/fault.c:707) [ 86.329107] ? do_user_addr_fault (/build/work/knet/arch/x86/mm/fault.c:1264) [ 86.329756] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.330350] ? __irq_work_queue_local (/build/work/knet/kernel/irq_work.c:111 (discriminator 1)) [ 86.331013] ? exc_page_fault (/build/work/knet/./arch/x86/include/asm/paravirt.h:693 /build/work/knet/arch/x86/mm/fault.c:1515 /build/work/knet/arch/x86/mm/fault.c:1563) [ 86.331702] ? asm_exc_page_fault (/build/work/knet/./arch/x86/include/asm/idtentry.h:570) [ 86.332468] ? ip_mr_forward (/build/work/knet/net/ipv4/ipmr.c:1985) [ 86.333183] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.333920] ipmr_mfc_add (/build/work/knet/./include/linux/rcupdate.h:782 /build/work/knet/net/ipv4/ipmr.c:1009 /build/work/knet/net/ipv4/ipmr.c:1273) [ 86.334583] ? __pfx_ipmr_hash_cmp (/build/work/knet/net/ipv4/ipmr.c:363) [ 86.335357] ip_mroute_setsockopt (/build/work/knet/net/ipv4/ipmr.c:1470) [ 86.336135] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.336854] ? ip_mroute_setsockopt (/build/work/knet/net/ipv4/ipmr.c:1470) [ 86.337679] do_ip_setsockopt (/build/work/knet/net/ipv4/ip_sockglue.c:944) [ 86.338408] ? __pfx_unix_stream_read_actor (/build/work/knet/net/unix/af_unix.c:2862) [ 86.339232] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.339809] ? aa_sk_perm (/build/work/knet/security/apparmor/include/cred.h:153 /build/work/knet/security/apparmor/net.c:181) [ 86.340342] ip_setsockopt (/build/work/knet/net/ipv4/ip_sockglue.c:1415) [ 86.340859] raw_setsockopt (/build/work/knet/net/ipv4/raw.c:836) [ 86.341408] ? security_socket_setsockopt (/build/work/knet/security/security.c:4561 (discriminator 13)) [ 86.342116] sock_common_setsockopt (/build/work/knet/net/core/sock.c:3716) [ 86.342747] do_sock_setsockopt (/build/work/knet/net/socket.c:2313) [ 86.343363] __sys_setsockopt (/build/work/knet/./include/linux/file.h:32 /build/work/knet/net/socket.c:2336) [ 86.344020] __x64_sys_setsockopt (/build/work/knet/net/socket.c:2340) [ 86.344766] do_syscall_64 (/build/work/knet/arch/x86/entry/common.c:52 /build/work/knet/arch/x86/entry/common.c:83) [ 86.345433] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.346161] ? syscall_exit_work (/build/work/knet/./include/linux/audit.h:357 /build/work/knet/kernel/entry/common.c:160) [ 86.346938] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.347657] ? syscall_exit_to_user_mode (/build/work/knet/kernel/entry/common.c:215) [ 86.348538] ? srso_return_thunk (/build/work/knet/arch/x86/lib/retpoline.S:223) [ 86.349262] ? do_syscall_64 (/build/work/knet/./arch/x86/include/asm/cpufeature.h:171 /build/work/knet/arch/x86/entry/common.c:98) [ 86.349971] entry_SYSCALL_64_after_hwframe (/build/work/knet/arch/x86/entry/entry_64.S:129) The original packet in ipmr_cache_report() may be queued and then forwarded with ip_mr_forward(). This last function has the assumption that the skb dst is set. After the below commit, the skb dst is dropped by ipv4_pktinfo_prepare(), which causes the oops. Fixes: bb7403655b3c ("ipmr: support IP_PKTINFO on cache report IGMP msg") Signed-off-by: Nicolas Dichtel Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240125141847.1931933-1-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski --- include/net/ip.h | 2 +- net/ipv4/ip_sockglue.c | 6 ++++-- net/ipv4/ipmr.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index de0c69c57e3cb..25cb688bdc623 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -767,7 +767,7 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev); * Functions provided by ip_sockglue.c */ -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst); void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 7aa9dc0e6760d..21d2ffa919e98 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1363,12 +1363,13 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, * ipv4_pktinfo_prepare - transfer some info from rtable to skb * @sk: socket * @skb: buffer + * @drop_dst: if true, drops skb dst * * To support IP_CMSG_PKTINFO option, we store rt_iif and specific * destination in skb->cb[] before dst drop. * This way, receiver doesn't make cache line misses to read rtable. */ -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst) { struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); bool prepare = inet_test_bit(PKTINFO, sk) || @@ -1397,7 +1398,8 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; } - skb_dst_drop(skb); + if (drop_dst) + skb_dst_drop(skb); } int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9d6f59531b3a0..3622298365105 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1073,7 +1073,7 @@ static int ipmr_cache_report(const struct mr_table *mrt, msg = (struct igmpmsg *)skb_network_header(skb); msg->im_vif = vifi; msg->im_vif_hi = vifi >> 8; - ipv4_pktinfo_prepare(mroute_sk, pkt); + ipv4_pktinfo_prepare(mroute_sk, pkt, false); memcpy(skb->cb, pkt->cb, sizeof(skb->cb)); /* Add our header */ igmp = skb_put(skb, sizeof(struct igmphdr)); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 27da9d7294c0b..aea89326c6979 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -292,7 +292,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) /* Charge it to the socket. */ - ipv4_pktinfo_prepare(sk, skb); + ipv4_pktinfo_prepare(sk, skb, true); if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { kfree_skb_reason(skb, reason); return NET_RX_DROP; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 148ffb007969f..f631b0a21af4c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2169,7 +2169,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) udp_csum_pull_header(skb); - ipv4_pktinfo_prepare(sk, skb); + ipv4_pktinfo_prepare(sk, skb, true); return __udp_queue_rcv_skb(sk, skb); csum_error: -- GitLab From 59f7ea703c38abc3f239068d49cc8897740e4c54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 9 Jan 2024 22:58:59 +0100 Subject: [PATCH 0228/1405] batman-adv: mcast: fix mcast packet type counter on timeouted nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a node which does not have the new batman-adv multicast packet type capability vanishes then the according, global counter erroneously would not be reduced in response on other nodes. Which in turn leads to the mesh never switching back to sending with the new multicast packet type. Fix this by reducing the according counter when such a node times out. Fixes: 90039133221e ("batman-adv: mcast: implement multicast packet generation") Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/multicast.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index d982daea83292..b4f8b4af17226 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -2198,6 +2198,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig) BATADV_MCAST_WANT_NO_RTR4); batadv_mcast_want_rtr6_update(bat_priv, orig, BATADV_MCAST_WANT_NO_RTR6); + batadv_mcast_have_mc_ptype_update(bat_priv, orig, + BATADV_MCAST_HAVE_MC_PTYPE_CAPA); spin_unlock_bh(&orig->mcast_handler_lock); } -- GitLab From 0a186b49bba596b81de5a686ce5bfc9cd48ab3ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Wed, 17 Jan 2024 06:22:20 +0100 Subject: [PATCH 0229/1405] batman-adv: mcast: fix memory leak on deleting a batman-adv interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The batman-adv multicast tracker TVLV handler is registered for the new batman-adv multicast packet type upon creating a batman-adv interface, but not unregistered again upon the interface's deletion, leading to a memory leak. Fix this memory leak by calling the according TVLV handler unregister routine for the multicast tracker TVLV upon batman-adv interface deletion. Fixes: 07afe1ba288c ("batman-adv: mcast: implement multicast packet reception and forwarding") Reported-and-tested-by: syzbot+ebe64cc5950868e77358@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000beadc4060f0cbc23@google.com/ Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/multicast.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index b4f8b4af17226..14088c4ff2f66 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -2175,6 +2175,7 @@ void batadv_mcast_free(struct batadv_priv *bat_priv) cancel_delayed_work_sync(&bat_priv->mcast.work); batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2); + batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST_TRACKER, 1); batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2); /* safely calling outside of worker, as worker was canceled above */ -- GitLab From 586e40aa883cab255ab8ddfe332c4092a5349cb7 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 24 Jan 2024 09:16:22 +0000 Subject: [PATCH 0230/1405] MAINTAINERS: Add connector headers to NETWORKING DRIVERS Commit 46cf789b68b2 ("connector: Move maintainence under networking drivers umbrella.") moved the connector maintenance but did not include the connector header files. It seems that it has always been implied that these headers were maintained along with the rest of the connector code, both before and after the cited commit. Make this explicit. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 92152ac346c8d..9435ea0f8cda0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15179,6 +15179,7 @@ F: Documentation/networking/net_cachelines/net_device.rst F: drivers/connector/ F: drivers/net/ F: include/dt-bindings/net/ +F: include/linux/cn_proc.h F: include/linux/etherdevice.h F: include/linux/fcdevice.h F: include/linux/fddidevice.h @@ -15186,6 +15187,7 @@ F: include/linux/hippidevice.h F: include/linux/if_* F: include/linux/inetdevice.h F: include/linux/netdevice.h +F: include/uapi/linux/cn_proc.h F: include/uapi/linux/if_* F: include/uapi/linux/netdevice.h X: drivers/net/wireless/ -- GitLab From 62b4248105353e7d1debd30ca5c57ec5e5f28e35 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Wed, 24 Jan 2024 11:17:58 +0100 Subject: [PATCH 0231/1405] net: lan966x: Fix port configuration when using SGMII interface In case the interface between the MAC and the PHY is SGMII, then the bit GIGA_MODE on the MAC side needs to be set regardless of the speed at which it is running. Fixes: d28d6d2e37d1 ("net: lan966x: add port module support") Signed-off-by: Horatiu Vultur Reviewed-by: Maxime Chevallier Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan966x/lan966x_port.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c index 92108d354051c..2e83bbb9477e0 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c @@ -168,9 +168,10 @@ static void lan966x_port_link_up(struct lan966x_port *port) lan966x_taprio_speed_set(port, config->speed); /* Also the GIGA_MODE_ENA(1) needs to be set regardless of the - * port speed for QSGMII ports. + * port speed for QSGMII or SGMII ports. */ - if (phy_interface_num_ports(config->portmode) == 4) + if (phy_interface_num_ports(config->portmode) == 4 || + config->portmode == PHY_INTERFACE_MODE_SGMII) mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA_SET(1); lan_wr(config->duplex | mode, -- GitLab From a69eeaad093dd2c8fd0b216c8143b380b73d672d Mon Sep 17 00:00:00 2001 From: Dimitri Fedrau Date: Fri, 26 Jan 2024 14:52:26 +0100 Subject: [PATCH 0232/1405] iio: humidity: hdc3020: fix temperature offset The temperature offset should be negative according to the datasheet. Adding a minus to the existing offset results in correct temperature calculations. Fixes: c9180b8e39be ("iio: humidity: Add driver for ti HDC302x humidity sensors") Reviewed-by: Nuno Sa Signed-off-by: Dimitri Fedrau Link: https://lore.kernel.org/r/20240126135226.3977904-1-dima.fedrau@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/hdc3020.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/humidity/hdc3020.c b/drivers/iio/humidity/hdc3020.c index 4e3311170725b..ed70415512f68 100644 --- a/drivers/iio/humidity/hdc3020.c +++ b/drivers/iio/humidity/hdc3020.c @@ -322,7 +322,7 @@ static int hdc3020_read_raw(struct iio_dev *indio_dev, if (chan->type != IIO_TEMP) return -EINVAL; - *val = 16852; + *val = -16852; return IIO_VAL_INT; default: -- GitLab From 6db053cd949fcd6254cea9f2cd5d39f7bd64379c Mon Sep 17 00:00:00 2001 From: David Schiller Date: Mon, 22 Jan 2024 14:49:17 +0100 Subject: [PATCH 0233/1405] staging: iio: ad5933: fix type mismatch regression Commit 4c3577db3e4f ("Staging: iio: impedance-analyzer: Fix sparse warning") fixed a compiler warning, but introduced a bug that resulted in one of the two 16 bit IIO channels always being zero (when both are enabled). This is because int is 32 bits wide on most architectures and in the case of a little-endian machine the two most significant bytes would occupy the buffer for the second channel as 'val' is being passed as a void pointer to 'iio_push_to_buffers()'. Fix by defining 'val' as u16. Tested working on ARM64. Fixes: 4c3577db3e4f ("Staging: iio: impedance-analyzer: Fix sparse warning") Signed-off-by: David Schiller Link: https://lore.kernel.org/r/20240122134916.2137957-1-david.schiller@jku.at Cc: Signed-off-by: Jonathan Cameron --- drivers/staging/iio/impedance-analyzer/ad5933.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c index e748a5d04e970..9149d41fe65b7 100644 --- a/drivers/staging/iio/impedance-analyzer/ad5933.c +++ b/drivers/staging/iio/impedance-analyzer/ad5933.c @@ -608,7 +608,7 @@ static void ad5933_work(struct work_struct *work) struct ad5933_state, work.work); struct iio_dev *indio_dev = i2c_get_clientdata(st->client); __be16 buf[2]; - int val[2]; + u16 val[2]; unsigned char status; int ret; -- GitLab From 915644189c22d9c93e9fee7c7c993b58e745bef7 Mon Sep 17 00:00:00 2001 From: Konstantin Aladyshev Date: Sat, 27 Jan 2024 18:48:44 +0300 Subject: [PATCH 0234/1405] hwmon: (pmbus/mp2975) Correct comment inside 'mp2975_read_byte_data' The current driver code no longer perfrom internal conversion from VID to direct. Instead it configures READ_VOUT using MFR_DC_LOOP_CTRL. Correct the comment message inside the 'mp2975_read_byte_data' function to match the driver logic. Signed-off-by: Konstantin Aladyshev Fixes: c60fe56c169e ("hwmon: (pmbus/mp2975) Fix driver initialization for MP2975 device") Link: https://lore.kernel.org/r/20240127154844.989-1-aladyshev22@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/mp2975.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/pmbus/mp2975.c b/drivers/hwmon/pmbus/mp2975.c index 5bbfdacb61a76..e5fa10b3b8bc7 100644 --- a/drivers/hwmon/pmbus/mp2975.c +++ b/drivers/hwmon/pmbus/mp2975.c @@ -131,10 +131,9 @@ static int mp2975_read_byte_data(struct i2c_client *client, int page, int reg) switch (reg) { case PMBUS_VOUT_MODE: /* - * Enforce VOUT direct format, since device allows to set the - * different formats for the different rails. Conversion from - * VID to direct provided by driver internally, in case it is - * necessary. + * Report direct format as configured by MFR_DC_LOOP_CTRL. + * Unlike on MP2971/MP2973 the reported VOUT_MODE isn't automatically + * internally updated, but always reads as PB_VOUT_MODE_VID. */ return PB_VOUT_MODE_DIRECT; default: -- GitLab From 6c20faec8ffc97af21acb43382adda011eb39359 Mon Sep 17 00:00:00 2001 From: Zhang Bingwu Date: Sun, 14 Jan 2024 16:13:59 +0800 Subject: [PATCH 0235/1405] kbuild: defconf: use SRCARCH to find merged configs For some ARCH values, SRCARCH, which should be used for finding arch/ subdirectory, is different from ARCH. Signed-off-by: Zhang Bingwu Signed-off-by: Masahiro Yamada --- scripts/Makefile.defconf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/Makefile.defconf b/scripts/Makefile.defconf index ab271b2051a24..226ea3df3b4b4 100644 --- a/scripts/Makefile.defconf +++ b/scripts/Makefile.defconf @@ -9,8 +9,8 @@ # Input config fragments without '.config' suffix define merge_into_defconfig $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \ - -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/$(1) \ - $(foreach config,$(2),$(srctree)/arch/$(ARCH)/configs/$(config).config) + -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \ + $(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config) +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig endef @@ -23,7 +23,7 @@ endef # Input config fragments without '.config' suffix define merge_into_defconfig_override $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \ - -Q -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/$(1) \ - $(foreach config,$(2),$(srctree)/arch/$(ARCH)/configs/$(config).config) + -Q -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \ + $(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config) +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig endef -- GitLab From 846cfbeed09b45d985079a9173cf390cc053715b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 23 Jan 2024 15:59:54 -0700 Subject: [PATCH 0236/1405] um: Fix adding '-no-pie' for clang The kernel builds with -fno-PIE, so commit 883354afbc10 ("um: link vmlinux with -no-pie") added the compiler linker flag '-no-pie' via cc-option because '-no-pie' was only supported in GCC 6.1.0 and newer. While this works for GCC, this does not work for clang because cc-option uses '-c', which stops the pipeline right before linking, so '-no-pie' is unconsumed and clang warns, causing cc-option to fail just as it would if the option was entirely unsupported: $ clang -Werror -no-pie -c -o /dev/null -x c /dev/null clang-16: error: argument unused during compilation: '-no-pie' [-Werror,-Wunused-command-line-argument] A recent version of clang exposes this because it generates a relocation under '-mcmodel=large' that is not supported in PIE mode: /usr/sbin/ld: init/main.o: relocation R_X86_64_32 against symbol `saved_command_line' can not be used when making a PIE object; recompile with -fPIE /usr/sbin/ld: failed to set dynamic section sizes: bad value clang: error: linker command failed with exit code 1 (use -v to see invocation) Remove the cc-option check altogether. It is wasteful to invoke the compiler to check for '-no-pie' because only one supported compiler version does not support it, GCC 5.x (as it is supported with the minimum version of clang and GCC 6.1.0+). Use a combination of the gcc-min-version macro and CONFIG_CC_IS_CLANG to unconditionally add '-no-pie' with CONFIG_LD_SCRIPT_DYN=y, so that it is enabled with all compilers that support this. Furthermore, using gcc-min-version can help turn this back into LINK-$(CONFIG_LD_SCRIPT_DYN) += -no-pie when the minimum version of GCC is bumped past 6.1.0. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/1982 Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- arch/um/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/um/Makefile b/arch/um/Makefile index 82f05f2506348..34957dcb88b9c 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -115,7 +115,9 @@ archprepare: $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static -LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie) +ifdef CONFIG_LD_SCRIPT_DYN +LINK-$(call gcc-min-version, 60100)$(CONFIG_CC_IS_CLANG) += -no-pie +endif LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \ -- GitLab From 397586506c3da005b9333ce5947ad01e8018a3be Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 23 Jan 2024 15:59:55 -0700 Subject: [PATCH 0237/1405] modpost: Add '.ltext' and '.ltext.*' to TEXT_SECTIONS After the linked LLVM change, building ARCH=um defconfig results in a segmentation fault in modpost. Prior to commit a23e7584ecf3 ("modpost: unify 'sym' and 'to' in default_mismatch_handler()"), there was a warning: WARNING: modpost: vmlinux.o(__ex_table+0x88): Section mismatch in reference to the .ltext:(unknown) WARNING: modpost: The relocation at __ex_table+0x88 references section ".ltext" which is not in the list of authorized sections. If you're adding a new section and/or if this reference is valid, add ".ltext" to the list of authorized sections to jump to on fault. This can be achieved by adding ".ltext" to OTHER_TEXT_SECTIONS in scripts/mod/modpost.c. The linked LLVM change moves global objects to the '.ltext' (and '.ltext.*' with '-ffunction-sections') sections with '-mcmodel=large', which ARCH=um uses. These sections should be handled just as '.text' and '.text.*' are, so add them to TEXT_SECTIONS. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/1981 Link: https://github.com/llvm/llvm-project/commit/4bf8a688956a759b7b6b8d94f42d25c13c7af130 Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 795b21154446d..acf72112acd8b 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -806,7 +806,8 @@ static void check_section(const char *modname, struct elf_info *elf, #define DATA_SECTIONS ".data", ".data.rel" #define TEXT_SECTIONS ".text", ".text.*", ".sched.text", \ - ".kprobes.text", ".cpuidle.text", ".noinstr.text" + ".kprobes.text", ".cpuidle.text", ".noinstr.text", \ + ".ltext", ".ltext.*" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ ".fixup", ".entry.text", ".exception.text", \ ".coldtext", ".softirqentry.text" -- GitLab From e30dca91e5667568a6be54886020c43f1f6f95d3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 27 Jan 2024 14:52:23 -0300 Subject: [PATCH 0238/1405] tools headers UAPI: Sync kvm headers with the kernel sources To pick the changes in: a5d3df8ae13fada7 ("KVM: remove deprecated UAPIs") 6d72283526090850 ("KVM x86/xen: add an override for PVCLOCK_TSC_STABLE_BIT") 89ea60c2c7b5838b ("KVM: x86: Add support for "protected VMs" that can utilize private memory") 8dd2eee9d526c30f ("KVM: x86/mmu: Handle page fault for private memory") a7800aa80ea4d535 ("KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory") 5a475554db1e476a ("KVM: Introduce per-page memory attributes") 16f95f3b95caded2 ("KVM: Add KVM_EXIT_MEMORY_FAULT exit to report faults to userspace") bb58b90b1a8f753b ("KVM: Introduce KVM_SET_USER_MEMORY_REGION2") 3f9cd0ca848413fd ("KVM: arm64: Allow userspace to get the writable masks for feature ID registers") That automatically adds support for some new ioctls and remove a bunch of deprecated ones. This ends up making the new binary to forget about the deprecated one, so when used in an older system it will not be able to resolve those codes to strings. $ tools/perf/trace/beauty/kvm_ioctl.sh > before $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h $ tools/perf/trace/beauty/kvm_ioctl.sh > after $ diff -u before after --- before 2024-01-27 14:48:16.523014020 -0300 +++ after 2024-01-27 14:48:24.183932866 -0300 @@ -14,6 +14,7 @@ [0x46] = "SET_USER_MEMORY_REGION", [0x47] = "SET_TSS_ADDR", [0x48] = "SET_IDENTITY_MAP_ADDR", + [0x49] = "SET_USER_MEMORY_REGION2", [0x60] = "CREATE_IRQCHIP", [0x61] = "IRQ_LINE", [0x62] = "GET_IRQCHIP", @@ -22,14 +23,8 @@ [0x65] = "GET_PIT", [0x66] = "SET_PIT", [0x67] = "IRQ_LINE_STATUS", - [0x69] = "ASSIGN_PCI_DEVICE", [0x6a] = "SET_GSI_ROUTING", - [0x70] = "ASSIGN_DEV_IRQ", [0x71] = "REINJECT_CONTROL", - [0x72] = "DEASSIGN_PCI_DEVICE", - [0x73] = "ASSIGN_SET_MSIX_NR", - [0x74] = "ASSIGN_SET_MSIX_ENTRY", - [0x75] = "DEASSIGN_DEV_IRQ", [0x76] = "IRQFD", [0x77] = "CREATE_PIT2", [0x78] = "SET_BOOT_CPU_ID", @@ -66,7 +61,6 @@ [0x9f] = "GET_VCPU_EVENTS", [0xa0] = "SET_VCPU_EVENTS", [0xa3] = "ENABLE_CAP", - [0xa4] = "ASSIGN_SET_INTX_MASK", [0xa5] = "SIGNAL_MSI", [0xa6] = "GET_XCRS", [0xa7] = "SET_XCRS", @@ -97,6 +91,8 @@ [0xcd] = "SET_SREGS2", [0xce] = "GET_STATS_FD", [0xd0] = "XEN_HVM_EVTCHN_SEND", + [0xd2] = "SET_MEMORY_ATTRIBUTES", + [0xd4] = "CREATE_GUEST_MEMFD", [0xe0] = "CREATE_DEVICE", [0xe1] = "SET_DEVICE_ATTR", [0xe2] = "GET_DEVICE_ATTR", $ This silences these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h Cc: Adrian Hunter Cc: Chao Peng Cc: Ian Rogers Cc: Jing Zhang Cc: Jiri Olsa Cc: Namhyung Kim Cc: Oliver Upton Cc: Paolo Bonzini Cc: Paul Durrant Cc: Sean Christopherson Link: https://lore.kernel.org/lkml/ZbVLbkngp4oq13qN@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/kvm.h | 3 + tools/include/uapi/linux/kvm.h | 140 +++++++++----------------- 2 files changed, 53 insertions(+), 90 deletions(-) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 1a6a1f9879496..a448d0964fc06 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -562,4 +562,7 @@ struct kvm_pmu_event_filter { /* x86-specific KVM_EXIT_HYPERCALL flags. */ #define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0) +#define KVM_X86_DEFAULT_VM 0 +#define KVM_X86_SW_PROTECTED_VM 1 + #endif /* _ASM_X86_KVM_H */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 211b86de35ac5..c3308536482bd 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -16,76 +16,6 @@ #define KVM_API_VERSION 12 -/* *** Deprecated interfaces *** */ - -#define KVM_TRC_SHIFT 16 - -#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT) -#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) - -#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01) -#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02) -#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01) - -#define KVM_TRC_HEAD_SIZE 12 -#define KVM_TRC_CYCLE_SIZE 8 -#define KVM_TRC_EXTRA_MAX 7 - -#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) -#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) -#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) -#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) -#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) -#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) -#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) -#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) -#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) -#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) -#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) -#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) -#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) -#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) -#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) -#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) -#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) -#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) -#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) -#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) -#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) -#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) -#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) -#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) - -struct kvm_user_trace_setup { - __u32 buf_size; - __u32 buf_nr; -}; - -#define __KVM_DEPRECATED_MAIN_W_0x06 \ - _IOW(KVMIO, 0x06, struct kvm_user_trace_setup) -#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07) -#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08) - -#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq) - -struct kvm_breakpoint { - __u32 enabled; - __u32 padding; - __u64 address; -}; - -struct kvm_debug_guest { - __u32 enabled; - __u32 pad; - struct kvm_breakpoint breakpoints[4]; - __u32 singlestep; -}; - -#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest) - -/* *** End of deprecated interfaces *** */ - - /* for KVM_SET_USER_MEMORY_REGION */ struct kvm_userspace_memory_region { __u32 slot; @@ -95,6 +25,19 @@ struct kvm_userspace_memory_region { __u64 userspace_addr; /* start of the userspace allocated memory */ }; +/* for KVM_SET_USER_MEMORY_REGION2 */ +struct kvm_userspace_memory_region2 { + __u32 slot; + __u32 flags; + __u64 guest_phys_addr; + __u64 memory_size; + __u64 userspace_addr; + __u64 guest_memfd_offset; + __u32 guest_memfd; + __u32 pad1; + __u64 pad2[14]; +}; + /* * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for * userspace, other bits are reserved for kvm internal use which are defined @@ -102,6 +45,7 @@ struct kvm_userspace_memory_region { */ #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) #define KVM_MEM_READONLY (1UL << 1) +#define KVM_MEM_GUEST_MEMFD (1UL << 2) /* for KVM_IRQ_LINE */ struct kvm_irq_level { @@ -265,6 +209,7 @@ struct kvm_xen_exit { #define KVM_EXIT_RISCV_CSR 36 #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 +#define KVM_EXIT_MEMORY_FAULT 39 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -518,6 +463,13 @@ struct kvm_run { #define KVM_NOTIFY_CONTEXT_INVALID (1 << 0) __u32 flags; } notify; + /* KVM_EXIT_MEMORY_FAULT */ + struct { +#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3) + __u64 flags; + __u64 gpa; + __u64 size; + } memory_fault; /* Fix the size of the union. */ char padding[256]; }; @@ -945,9 +897,6 @@ struct kvm_ppc_resize_hpt { */ #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ #define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2) -#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 -#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 -#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 #define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) #define KVM_GET_MSR_FEATURE_INDEX_LIST _IOWR(KVMIO, 0x0a, struct kvm_msr_list) @@ -1201,6 +1150,11 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 +#define KVM_CAP_USER_MEMORY2 231 +#define KVM_CAP_MEMORY_FAULT_INFO 232 +#define KVM_CAP_MEMORY_ATTRIBUTES 233 +#define KVM_CAP_GUEST_MEMFD 234 +#define KVM_CAP_VM_TYPES 235 #ifdef KVM_CAP_IRQ_ROUTING @@ -1291,6 +1245,7 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) +#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) struct kvm_xen_hvm_config { __u32 flags; @@ -1483,6 +1438,8 @@ struct kvm_vfio_spapr_tce { struct kvm_userspace_memory_region) #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) +#define KVM_SET_USER_MEMORY_REGION2 _IOW(KVMIO, 0x49, \ + struct kvm_userspace_memory_region2) /* enable ucontrol for s390 */ struct kvm_s390_ucas_mapping { @@ -1507,20 +1464,8 @@ struct kvm_s390_ucas_mapping { _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) #define KVM_UNREGISTER_COALESCED_MMIO \ _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) -#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ - struct kvm_assigned_pci_dev) #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) -/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */ -#define KVM_ASSIGN_IRQ __KVM_DEPRECATED_VM_R_0x70 -#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) -#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ - struct kvm_assigned_pci_dev) -#define KVM_ASSIGN_SET_MSIX_NR _IOW(KVMIO, 0x73, \ - struct kvm_assigned_msix_nr) -#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO, 0x74, \ - struct kvm_assigned_msix_entry) -#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) #define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) #define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) @@ -1537,9 +1482,6 @@ struct kvm_s390_ucas_mapping { * KVM_CAP_VM_TSC_CONTROL to set defaults for a VM */ #define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) #define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) -/* Available with KVM_CAP_PCI_2_3 */ -#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \ - struct kvm_assigned_pci_dev) /* Available with KVM_CAP_SIGNAL_MSI */ #define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi) /* Available with KVM_CAP_PPC_GET_SMMU_INFO */ @@ -1592,8 +1534,6 @@ struct kvm_s390_ucas_mapping { #define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs) #define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) #define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) -/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */ -#define KVM_DEBUG_GUEST __KVM_DEPRECATED_VCPU_W_0x87 #define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) #define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) #define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) @@ -2267,4 +2207,24 @@ struct kvm_s390_zpci_op { /* flags for kvm_s390_zpci_op->u.reg_aen.flags */ #define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) +/* Available with KVM_CAP_MEMORY_ATTRIBUTES */ +#define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes) + +struct kvm_memory_attributes { + __u64 address; + __u64 size; + __u64 attributes; + __u64 flags; +}; + +#define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) + +#define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) + +struct kvm_create_guest_memfd { + __u64 size; + __u64 flags; + __u64 reserved[6]; +}; + #endif /* __LINUX_KVM_H */ -- GitLab From becc24e96ad4b9bc5c5bba800dc184661b6702bc Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 4 Jan 2024 15:19:03 -0800 Subject: [PATCH 0239/1405] perf vendor events intel: Alderlake/sapphirerapids metric fixes As events are deduplicated by name, ensure PMU prefixes are always used in metrics. Previously they may be missed on the first event in a formula. Update metric constraints for architectures with topdown l2 events. Conversion script updated in: https://github.com/intel/perfmon/pull/128 Reported-by: Arnaldo Carvalho de Melo Reviewed-by: Kan Liang Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Edward Baker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Closes: https://lore.kernel.org/lkml/ZZam-EG-UepcXtWw@kernel.org/ Link: https://lore.kernel.org/r/20240104231903.775717-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/x86/alderlake/adl-metrics.json | 254 ++++++++---------- .../arch/x86/alderlaken/adln-metrics.json | 4 - .../arch/x86/sapphirerapids/spr-metrics.json | 25 +- 3 files changed, 123 insertions(+), 160 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json index 35124a4ddcb2b..bbfa3883e5338 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json @@ -114,7 +114,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions.", - "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", "MetricName": "tma_alloc_restriction", "MetricThreshold": "tma_alloc_restriction > 0.1", @@ -124,7 +124,7 @@ { "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", "DefaultMetricgroupName": "TopdownL1", - "MetricExpr": "TOPDOWN_BE_BOUND.ALL / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALL@ / tma_info_core_slots", "MetricGroup": "Default;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.1", @@ -169,7 +169,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend", - "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_DETECT@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", "MetricName": "tma_branch_detect", "MetricThreshold": "tma_branch_detect > 0.05", @@ -179,7 +179,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts.", - "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MISPREDICT@ / tma_info_core_slots", "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.05", @@ -189,7 +189,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.", - "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_RESTEER@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", "MetricName": "tma_branch_resteer", "MetricThreshold": "tma_branch_resteer > 0.05", @@ -198,7 +198,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).", - "MetricExpr": "TOPDOWN_FE_BOUND.CISC / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.CISC@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", "MetricName": "tma_cisc", "MetricThreshold": "tma_cisc > 0.05", @@ -217,7 +217,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.", - "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.DECODE@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", "MetricName": "tma_decode", "MetricThreshold": "tma_decode > 0.05", @@ -235,7 +235,6 @@ }, { "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_dram_bound", @@ -245,7 +244,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear classified as a fast nuke due to memory ordering, memory disambiguation and memory renaming.", - "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.FASTNUKE@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group", "MetricName": "tma_fast_nuke", "MetricThreshold": "tma_fast_nuke > 0.05", @@ -254,7 +253,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", - "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH@ / tma_info_core_slots", "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1", @@ -264,7 +263,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", - "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_LATENCY@ / tma_info_core_slots", "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.15", @@ -283,7 +282,7 @@ }, { "BriefDescription": "Counts the number of floating point divide operations per uop.", - "MetricExpr": "UOPS_RETIRED.FPDIV / tma_info_core_slots", + "MetricExpr": "cpu_atom@UOPS_RETIRED.FPDIV@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_base_group", "MetricName": "tma_fpdiv_uops", "MetricThreshold": "tma_fpdiv_uops > 0.2", @@ -293,7 +292,7 @@ { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.", "DefaultMetricgroupName": "TopdownL1", - "MetricExpr": "TOPDOWN_FE_BOUND.ALL / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ALL@ / tma_info_core_slots", "MetricGroup": "Default;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.2", @@ -303,7 +302,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.", - "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ICACHE@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", "MetricName": "tma_icache_misses", "MetricThreshold": "tma_icache_misses > 0.05", @@ -330,7 +329,7 @@ }, { "BriefDescription": "Instructions Per Cycle", - "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / tma_info_core_clks", "MetricName": "tma_info_core_ipc", "Unit": "cpu_atom" }, @@ -342,7 +341,7 @@ }, { "BriefDescription": "Uops Per Instruction", - "MetricExpr": "UOPS_RETIRED.ALL / INST_RETIRED.ANY", + "MetricExpr": "cpu_atom@UOPS_RETIRED.ALL@ / INST_RETIRED.ANY", "MetricName": "tma_info_core_upi", "Unit": "cpu_atom" }, @@ -366,13 +365,13 @@ }, { "BriefDescription": "Ratio of all branches which mispredict", - "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES", + "MetricExpr": "cpu_atom@BR_MISP_RETIRED.ALL_BRANCHES@ / BR_INST_RETIRED.ALL_BRANCHES", "MetricName": "tma_info_inst_mix_branch_mispredict_ratio", "Unit": "cpu_atom" }, { "BriefDescription": "Ratio between Mispredicted branches and unknown branches", - "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY", + "MetricExpr": "cpu_atom@BR_MISP_RETIRED.ALL_BRANCHES@ / BACLEARS.ANY", "MetricName": "tma_info_inst_mix_branch_mispredict_to_unknown_branch_ratio", "Unit": "cpu_atom" }, @@ -390,61 +389,61 @@ }, { "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_INST_RETIRED.ALL_BRANCHES", "MetricName": "tma_info_inst_mix_ipbranch", "Unit": "cpu_atom" }, { "BriefDescription": "Instruction per (near) call (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_INST_RETIRED.CALL", "MetricName": "tma_info_inst_mix_ipcall", "Unit": "cpu_atom" }, { "BriefDescription": "Instructions per Far Branch", - "MetricExpr": "INST_RETIRED.ANY / (cpu_atom@BR_INST_RETIRED.FAR_BRANCH@ / 2)", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / (cpu_atom@BR_INST_RETIRED.FAR_BRANCH@ / 2)", "MetricName": "tma_info_inst_mix_ipfarbranch", "Unit": "cpu_atom" }, { "BriefDescription": "Instructions per Load", - "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / MEM_UOPS_RETIRED.ALL_LOADS", "MetricName": "tma_info_inst_mix_ipload", "Unit": "cpu_atom" }, { "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was not taken", - "MetricExpr": "INST_RETIRED.ANY / (cpu_atom@BR_MISP_RETIRED.COND@ - cpu_atom@BR_MISP_RETIRED.COND_TAKEN@)", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / (cpu_atom@BR_MISP_RETIRED.COND@ - cpu_atom@BR_MISP_RETIRED.COND_TAKEN@)", "MetricName": "tma_info_inst_mix_ipmisp_cond_ntaken", "Unit": "cpu_atom" }, { "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was taken", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_TAKEN", "MetricName": "tma_info_inst_mix_ipmisp_cond_taken", "Unit": "cpu_atom" }, { "BriefDescription": "Instructions per retired indirect call or jump Branch Misprediction", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.INDIRECT", "MetricName": "tma_info_inst_mix_ipmisp_indirect", "Unit": "cpu_atom" }, { "BriefDescription": "Instructions per retired return Branch Misprediction", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RETURN", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.RETURN", "MetricName": "tma_info_inst_mix_ipmisp_ret", "Unit": "cpu_atom" }, { "BriefDescription": "Instructions per retired Branch Misprediction", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.ALL_BRANCHES", "MetricName": "tma_info_inst_mix_ipmispredict", "Unit": "cpu_atom" }, { "BriefDescription": "Instructions per Store", - "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES", + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / MEM_UOPS_RETIRED.ALL_STORES", "MetricName": "tma_info_inst_mix_ipstore", "Unit": "cpu_atom" }, @@ -480,19 +479,19 @@ }, { "BriefDescription": "Cycle cost per DRAM hit", - "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_LOAD_UOPS_RETIRED.DRAM_HIT", + "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / MEM_LOAD_UOPS_RETIRED.DRAM_HIT", "MetricName": "tma_info_memory_cycles_per_demand_load_dram_hit", "Unit": "cpu_atom" }, { "BriefDescription": "Cycle cost per L2 hit", - "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_LOAD_UOPS_RETIRED.L2_HIT", + "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / MEM_LOAD_UOPS_RETIRED.L2_HIT", "MetricName": "tma_info_memory_cycles_per_demand_load_l2_hit", "Unit": "cpu_atom" }, { "BriefDescription": "Cycle cost per LLC hit", - "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_LOAD_UOPS_RETIRED.L3_HIT", + "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / MEM_LOAD_UOPS_RETIRED.L3_HIT", "MetricName": "tma_info_memory_cycles_per_demand_load_l3_hit", "Unit": "cpu_atom" }, @@ -504,7 +503,7 @@ }, { "BriefDescription": "Average CPU Utilization", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC", + "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / TSC", "MetricName": "tma_info_system_cpu_utilization", "Unit": "cpu_atom" }, @@ -524,7 +523,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.", - "MetricExpr": "TOPDOWN_FE_BOUND.ITLB / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ITLB@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", "MetricName": "tma_itlb_misses", "MetricThreshold": "tma_itlb_misses > 0.05", @@ -533,7 +532,7 @@ }, { "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a load block.", - "MetricExpr": "LD_HEAD.L1_BOUND_AT_RET / tma_info_core_clks", + "MetricExpr": "cpu_atom@LD_HEAD.L1_BOUND_AT_RET@ / tma_info_core_clks", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l1_bound", "MetricThreshold": "tma_l1_bound > 0.1", @@ -542,7 +541,6 @@ }, { "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l2_bound", @@ -552,7 +550,6 @@ }, { "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", @@ -571,7 +568,7 @@ }, { "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.", - "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS@ / tma_info_core_slots", "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.05", @@ -581,7 +578,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.", - "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.MEM_SCHEDULER@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", "MetricName": "tma_mem_scheduler", "MetricThreshold": "tma_mem_scheduler > 0.1", @@ -590,7 +587,7 @@ }, { "BriefDescription": "Counts the number of cycles the core is stalled due to stores or loads.", - "MetricExpr": "min(cpu_atom@TOPDOWN_BE_BOUND.ALL@ / tma_info_core_slots, cpu_atom@LD_HEAD.ANY_AT_RET@ / tma_info_core_clks + tma_store_bound)", + "MetricExpr": "min(tma_backend_bound, cpu_atom@LD_HEAD.ANY_AT_RET@ / tma_info_core_clks + tma_store_bound)", "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2", @@ -609,7 +606,7 @@ }, { "BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS)", - "MetricExpr": "UOPS_RETIRED.MS / tma_info_core_slots", + "MetricExpr": "cpu_atom@UOPS_RETIRED.MS@ / tma_info_core_slots", "MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_ms_uops", "MetricThreshold": "tma_ms_uops > 0.05", @@ -620,7 +617,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.", - "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", "MetricName": "tma_non_mem_scheduler", "MetricThreshold": "tma_non_mem_scheduler > 0.1", @@ -629,7 +626,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear (slow nuke).", - "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.NUKE@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group", "MetricName": "tma_nuke", "MetricThreshold": "tma_nuke > 0.05", @@ -638,7 +635,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.", - "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.OTHER@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", "MetricName": "tma_other_fb", "MetricThreshold": "tma_other_fb > 0.05", @@ -647,7 +644,7 @@ }, { "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a number of other load blocks.", - "MetricExpr": "LD_HEAD.OTHER_AT_RET / tma_info_core_clks", + "MetricExpr": "cpu_atom@LD_HEAD.OTHER_AT_RET@ / tma_info_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_other_l1", "MetricThreshold": "tma_other_l1 > 0.05", @@ -683,7 +680,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.", - "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.PREDECODE@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", "MetricName": "tma_predecode", "MetricThreshold": "tma_predecode > 0.05", @@ -692,7 +689,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).", - "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REGISTER@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", "MetricName": "tma_register", "MetricThreshold": "tma_register > 0.1", @@ -701,7 +698,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls).", - "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REORDER_BUFFER@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", "MetricName": "tma_reorder_buffer", "MetricThreshold": "tma_reorder_buffer > 0.1", @@ -722,7 +719,7 @@ { "BriefDescription": "Counts the number of issue slots that result in retirement slots.", "DefaultMetricgroupName": "TopdownL1", - "MetricExpr": "TOPDOWN_RETIRING.ALL / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_RETIRING.ALL@ / tma_info_core_slots", "MetricGroup": "Default;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.75", @@ -741,7 +738,7 @@ }, { "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).", - "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / tma_info_core_slots", + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.SERIALIZATION@ / tma_info_core_slots", "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", "MetricName": "tma_serialization", "MetricThreshold": "tma_serialization > 0.1", @@ -768,7 +765,7 @@ }, { "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a first level TLB miss.", - "MetricExpr": "LD_HEAD.DTLB_MISS_AT_RET / tma_info_core_clks", + "MetricExpr": "cpu_atom@LD_HEAD.DTLB_MISS_AT_RET@ / tma_info_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_stlb_hit", "MetricThreshold": "tma_stlb_hit > 0.05", @@ -777,7 +774,7 @@ }, { "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a second level TLB miss requiring a page walk.", - "MetricExpr": "LD_HEAD.PGWALK_AT_RET / tma_info_core_clks", + "MetricExpr": "cpu_atom@LD_HEAD.PGWALK_AT_RET@ / tma_info_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_stlb_miss", "MetricThreshold": "tma_stlb_miss > 0.05", @@ -795,8 +792,7 @@ }, { "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", - "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks", + "MetricExpr": "cpu_atom@LD_HEAD.ST_ADDR_AT_RET@ / tma_info_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_store_fwd_blk", "MetricThreshold": "tma_store_fwd_blk > 0.05", @@ -875,7 +871,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers", - "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks + tma_unknown_branches", + "MetricExpr": "cpu_core@INT_MISC.CLEAR_RESTEER_CYCLES@ / tma_info_thread_clks + tma_unknown_branches", "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group", "MetricName": "tma_branch_resteers", "MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", @@ -905,7 +901,6 @@ }, { "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(25 * tma_info_system_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) + 24 * tma_info_system_average_frequency * cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks", "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", "MetricName": "tma_contested_accesses", @@ -927,7 +922,6 @@ }, { "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "24 * tma_info_system_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD@ + cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (1 - cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks", "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", "MetricName": "tma_data_sharing", @@ -948,7 +942,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles where the Divider unit was active", - "MetricExpr": "ARITH.DIV_ACTIVE / tma_info_thread_clks", + "MetricExpr": "cpu_core@ARITH.DIV_ACTIVE@ / tma_info_thread_clks", "MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group", "MetricName": "tma_divider", "MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)", @@ -958,7 +952,6 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@ / tma_info_thread_clks", "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_dram_bound", @@ -979,7 +972,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines", - "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks", + "MetricExpr": "cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES@ / tma_info_thread_clks", "MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB", "MetricName": "tma_dsb_switches", "MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", @@ -1019,7 +1012,7 @@ }, { "BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed", - "MetricExpr": "L1D_PEND_MISS.FB_FULL / tma_info_thread_clks", + "MetricExpr": "cpu_core@L1D_PEND_MISS.FB_FULL@ / tma_info_thread_clks", "MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group", "MetricName": "tma_fb_full", "MetricThreshold": "tma_fb_full > 0.3", @@ -1154,7 +1147,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses", - "MetricExpr": "ICACHE_DATA.STALLS / tma_info_thread_clks", + "MetricExpr": "cpu_core@ICACHE_DATA.STALLS@ / tma_info_thread_clks", "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group", "MetricName": "tma_icache_misses", "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", @@ -1164,7 +1157,6 @@ }, { "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "Bad;BrMispredicts;tma_issueBM", "MetricName": "tma_info_bad_spec_branch_misprediction_cost", @@ -1173,7 +1165,7 @@ }, { "BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_NTAKEN", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_NTAKEN", "MetricGroup": "Bad;BrMispredicts", "MetricName": "tma_info_bad_spec_ipmisp_cond_ntaken", "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_ntaken < 200", @@ -1181,7 +1173,7 @@ }, { "BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_TAKEN", "MetricGroup": "Bad;BrMispredicts", "MetricName": "tma_info_bad_spec_ipmisp_cond_taken", "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_taken < 200", @@ -1197,7 +1189,7 @@ }, { "BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RET", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.RET", "MetricGroup": "Bad;BrMispredicts", "MetricName": "tma_info_bad_spec_ipmisp_ret", "MetricThreshold": "tma_info_bad_spec_ipmisp_ret < 500", @@ -1205,7 +1197,7 @@ }, { "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "Bad;BadSpec;BrMispredicts", "MetricName": "tma_info_bad_spec_ipmispredict", "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200", @@ -1213,7 +1205,6 @@ }, { "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)", "MetricGroup": "Cor;SMT", "MetricName": "tma_info_botlnk_l0_core_bound_likely", @@ -1222,7 +1213,6 @@ }, { "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))", "MetricGroup": "DSBmiss;Fed;tma_issueFB", "MetricName": "tma_info_botlnk_l2_dsb_misses", @@ -1232,7 +1222,6 @@ }, { "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL", "MetricName": "tma_info_botlnk_l2_ic_misses", @@ -1242,7 +1231,6 @@ }, { "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)", "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC", "MetricName": "tma_info_bottleneck_big_code", @@ -1261,7 +1249,6 @@ }, { "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code", "MetricGroup": "Fed;FetchBW;Frontend", "MetricName": "tma_info_bottleneck_instruction_fetch_bw", @@ -1270,7 +1257,6 @@ }, { "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))", "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW", "MetricName": "tma_info_bottleneck_memory_bandwidth", @@ -1280,7 +1266,6 @@ }, { "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))", "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB", "MetricName": "tma_info_bottleneck_memory_data_tlbs", @@ -1290,7 +1275,6 @@ }, { "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))", "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat", "MetricName": "tma_info_bottleneck_memory_latency", @@ -1300,7 +1284,6 @@ }, { "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM", "MetricName": "tma_info_bottleneck_mispredictions", @@ -1317,14 +1300,14 @@ }, { "BriefDescription": "Fraction of branches that are non-taken conditionals", - "MetricExpr": "BR_INST_RETIRED.COND_NTAKEN / BR_INST_RETIRED.ALL_BRANCHES", + "MetricExpr": "cpu_core@BR_INST_RETIRED.COND_NTAKEN@ / BR_INST_RETIRED.ALL_BRANCHES", "MetricGroup": "Bad;Branches;CodeGen;PGO", "MetricName": "tma_info_branches_cond_nt", "Unit": "cpu_core" }, { "BriefDescription": "Fraction of branches that are taken conditionals", - "MetricExpr": "BR_INST_RETIRED.COND_TAKEN / BR_INST_RETIRED.ALL_BRANCHES", + "MetricExpr": "cpu_core@BR_INST_RETIRED.COND_TAKEN@ / BR_INST_RETIRED.ALL_BRANCHES", "MetricGroup": "Bad;Branches;CodeGen;PGO", "MetricName": "tma_info_branches_cond_tk", "Unit": "cpu_core" @@ -1352,7 +1335,7 @@ }, { "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / tma_info_core_core_clks", "MetricGroup": "Ret;SMT;TmaL1;tma_L1_group", "MetricName": "tma_info_core_coreipc", "Unit": "cpu_core" @@ -1374,14 +1357,14 @@ }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core", - "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@)", + "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@)", "MetricGroup": "Backend;Cor;Pipeline;PortsUtil", "MetricName": "tma_info_core_ilp", "Unit": "cpu_core" }, { "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / cpu_core@UOPS_ISSUED.ANY@", + "MetricExpr": "cpu_core@IDQ.DSB_UOPS@ / cpu_core@UOPS_ISSUED.ANY@", "MetricGroup": "DSB;Fed;FetchBW;tma_issueFB", "MetricName": "tma_info_frontend_dsb_coverage", "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 6 > 0.35", @@ -1390,28 +1373,28 @@ }, { "BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.", - "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@", + "MetricExpr": "cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES@ / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@", "MetricGroup": "DSBmiss", "MetricName": "tma_info_frontend_dsb_switch_cost", "Unit": "cpu_core" }, { "BriefDescription": "Average number of Uops issued by front-end when it issued something", - "MetricExpr": "UOPS_ISSUED.ANY / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=1@", + "MetricExpr": "cpu_core@UOPS_ISSUED.ANY@ / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=1@", "MetricGroup": "Fed;FetchBW", "MetricName": "tma_info_frontend_fetch_upc", "Unit": "cpu_core" }, { "BriefDescription": "Average Latency for L1 instruction cache misses", - "MetricExpr": "ICACHE_DATA.STALLS / cpu_core@ICACHE_DATA.STALLS\\,cmask\\=1\\,edge@", + "MetricExpr": "cpu_core@ICACHE_DATA.STALLS@ / cpu_core@ICACHE_DATA.STALLS\\,cmask\\=1\\,edge@", "MetricGroup": "Fed;FetchLat;IcMiss", "MetricName": "tma_info_frontend_icache_miss_latency", "Unit": "cpu_core" }, { "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FRONTEND_RETIRED.ANY_DSB_MISS", "MetricGroup": "DSBmiss;Fed", "MetricName": "tma_info_frontend_ipdsb_miss_ret", "MetricThreshold": "tma_info_frontend_ipdsb_miss_ret < 50", @@ -1440,14 +1423,14 @@ }, { "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)", - "MetricExpr": "LSD.UOPS / cpu_core@UOPS_ISSUED.ANY@", + "MetricExpr": "cpu_core@LSD.UOPS@ / cpu_core@UOPS_ISSUED.ANY@", "MetricGroup": "Fed;LSD", "MetricName": "tma_info_frontend_lsd_coverage", "Unit": "cpu_core" }, { "BriefDescription": "Branch instructions per taken branch.", - "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", + "MetricExpr": "cpu_core@BR_INST_RETIRED.ALL_BRANCHES@ / BR_INST_RETIRED.NEAR_TAKEN", "MetricGroup": "Branches;Fed;PGO", "MetricName": "tma_info_inst_mix_bptkbranch", "Unit": "cpu_core" @@ -1462,7 +1445,7 @@ }, { "BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@)", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@)", "MetricGroup": "Flops;InsType", "MetricName": "tma_info_inst_mix_iparith", "MetricThreshold": "tma_info_inst_mix_iparith < 10", @@ -1471,7 +1454,7 @@ }, { "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)", "MetricGroup": "Flops;FpVector;InsType", "MetricName": "tma_info_inst_mix_iparith_avx128", "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10", @@ -1480,7 +1463,7 @@ }, { "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)", "MetricGroup": "Flops;FpVector;InsType", "MetricName": "tma_info_inst_mix_iparith_avx256", "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10", @@ -1489,7 +1472,7 @@ }, { "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", "MetricGroup": "Flops;FpScalar;InsType", "MetricName": "tma_info_inst_mix_iparith_scalar_dp", "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10", @@ -1498,7 +1481,7 @@ }, { "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FP_ARITH_INST_RETIRED.SCALAR_SINGLE", "MetricGroup": "Flops;FpScalar;InsType", "MetricName": "tma_info_inst_mix_iparith_scalar_sp", "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10", @@ -1507,7 +1490,7 @@ }, { "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.ALL_BRANCHES", "MetricGroup": "Branches;Fed;InsType", "MetricName": "tma_info_inst_mix_ipbranch", "MetricThreshold": "tma_info_inst_mix_ipbranch < 8", @@ -1515,7 +1498,7 @@ }, { "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.NEAR_CALL", "MetricGroup": "Branches;Fed;PGO", "MetricName": "tma_info_inst_mix_ipcall", "MetricThreshold": "tma_info_inst_mix_ipcall < 200", @@ -1523,7 +1506,7 @@ }, { "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)", "MetricGroup": "Flops;InsType", "MetricName": "tma_info_inst_mix_ipflop", "MetricThreshold": "tma_info_inst_mix_ipflop < 10", @@ -1531,7 +1514,7 @@ }, { "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / MEM_INST_RETIRED.ALL_LOADS", "MetricGroup": "InsType", "MetricName": "tma_info_inst_mix_ipload", "MetricThreshold": "tma_info_inst_mix_ipload < 3", @@ -1539,7 +1522,7 @@ }, { "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / MEM_INST_RETIRED.ALL_STORES", "MetricGroup": "InsType", "MetricName": "tma_info_inst_mix_ipstore", "MetricThreshold": "tma_info_inst_mix_ipstore < 8", @@ -1547,7 +1530,7 @@ }, { "BriefDescription": "Instructions per Software prefetch instruction (of any type: NTA/T0/T1/T2/Prefetch) (lower number means higher occurrence rate)", - "MetricExpr": "INST_RETIRED.ANY / cpu_core@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@", "MetricGroup": "Prefetches", "MetricName": "tma_info_inst_mix_ipswpf", "MetricThreshold": "tma_info_inst_mix_ipswpf < 100", @@ -1555,7 +1538,7 @@ }, { "BriefDescription": "Instruction per taken branch", - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.NEAR_TAKEN", "MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB", "MetricName": "tma_info_inst_mix_iptb", "MetricThreshold": "tma_info_inst_mix_iptb < 13", @@ -1655,14 +1638,14 @@ }, { "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)", - "MetricExpr": "L1D_PEND_MISS.PENDING / MEM_LOAD_COMPLETED.L1_MISS_ANY", + "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / MEM_LOAD_COMPLETED.L1_MISS_ANY", "MetricGroup": "Mem;MemoryBound;MemoryLat", "MetricName": "tma_info_memory_load_miss_real_latency", "Unit": "cpu_core" }, { "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss", - "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", + "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / L1D_PEND_MISS.PENDING_CYCLES", "MetricGroup": "Mem;MemoryBW;MemoryBound", "MetricName": "tma_info_memory_mlp", "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)", @@ -1670,28 +1653,28 @@ }, { "BriefDescription": "Average Parallel L2 cache miss data reads", - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD@ / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", "MetricGroup": "Memory_BW;Offcore", "MetricName": "tma_info_memory_oro_data_l2_mlp", "Unit": "cpu_core" }, { "BriefDescription": "Average Latency for L2 cache miss demand Loads", - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD", + "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD@ / OFFCORE_REQUESTS.DEMAND_DATA_RD", "MetricGroup": "Memory_Lat;Offcore", "MetricName": "tma_info_memory_oro_load_l2_miss_latency", "Unit": "cpu_core" }, { "BriefDescription": "Average Parallel L2 cache miss demand Loads", - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@", + "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD@ / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@", "MetricGroup": "Memory_BW;Offcore", "MetricName": "tma_info_memory_oro_load_l2_mlp", "Unit": "cpu_core" }, { "BriefDescription": "Average Latency for L3 cache miss demand Loads", - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", + "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD@ / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", "MetricGroup": "Memory_Lat;Offcore", "MetricName": "tma_info_memory_oro_load_l3_miss_latency", "Unit": "cpu_core" @@ -1755,14 +1738,14 @@ }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread", - "MetricExpr": "UOPS_EXECUTED.THREAD / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@", + "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@", "MetricGroup": "Cor;Pipeline;PortsUtil;SMT", "MetricName": "tma_info_pipeline_execute", "Unit": "cpu_core" }, { "BriefDescription": "Instructions per a microcode Assist invocation", - "MetricExpr": "INST_RETIRED.ANY / cpu_core@ASSISTS.ANY\\,umask\\=0x1B@", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@ASSISTS.ANY\\,umask\\=0x1B@", "MetricGroup": "Pipeline;Ret;Retire", "MetricName": "tma_info_pipeline_ipassist", "MetricThreshold": "tma_info_pipeline_ipassist < 100e3", @@ -1778,7 +1761,7 @@ }, { "BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions", - "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@", + "MetricExpr": "cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@", "MetricGroup": "Pipeline;Ret", "MetricName": "tma_info_pipeline_strings_cycles", "MetricThreshold": "tma_info_pipeline_strings_cycles > 0.1", @@ -1793,7 +1776,7 @@ }, { "BriefDescription": "Average CPU Utilization", - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC", + "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / TSC", "MetricGroup": "HPC;Summary", "MetricName": "tma_info_system_cpu_utilization", "Unit": "cpu_core" @@ -1816,7 +1799,7 @@ }, { "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]", - "MetricExpr": "INST_RETIRED.ANY / cpu_core@BR_INST_RETIRED.FAR_BRANCH@u", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@BR_INST_RETIRED.FAR_BRANCH@u", "MetricGroup": "Branches;OS", "MetricName": "tma_info_system_ipfarbranch", "MetricThreshold": "tma_info_system_ipfarbranch < 1e6", @@ -1847,6 +1830,7 @@ }, { "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.RD + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.RD", "MetricGroup": "Mem;MemoryLat;SoC", "MetricName": "tma_info_system_mem_read_latency", @@ -1855,6 +1839,7 @@ }, { "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.ALL + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.ALL", "MetricGroup": "Mem;SoC", "MetricName": "tma_info_system_mem_request_latency", @@ -1897,7 +1882,7 @@ }, { "BriefDescription": "The ratio of Executed- by Issued-Uops", - "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY", + "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / UOPS_ISSUED.ANY", "MetricGroup": "Cor;Pipeline", "MetricName": "tma_info_thread_execute_per_issue", "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage.", @@ -1905,7 +1890,7 @@ }, { "BriefDescription": "Instructions Per Cycle (per Logical Processor)", - "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks", + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / tma_info_thread_clks", "MetricGroup": "Ret;Summary", "MetricName": "tma_info_thread_ipc", "Unit": "cpu_core" @@ -1972,7 +1957,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses", - "MetricExpr": "ICACHE_TAG.STALLS / tma_info_thread_clks", + "MetricExpr": "cpu_core@ICACHE_TAG.STALLS@ / tma_info_thread_clks", "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group", "MetricName": "tma_itlb_misses", "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", @@ -1992,7 +1977,6 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L1D_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@) / tma_info_thread_clks", "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l2_bound", @@ -2003,7 +1987,6 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@) / tma_info_thread_clks", "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", @@ -2024,7 +2007,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)", - "MetricExpr": "DECODE.LCP / tma_info_thread_clks", + "MetricExpr": "cpu_core@DECODE.LCP@ / tma_info_thread_clks", "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB", "MetricName": "tma_lcp", "MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", @@ -2045,7 +2028,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations", - "MetricExpr": "UOPS_DISPATCHED.PORT_2_3_10 / (3 * tma_info_core_core_clks)", + "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_2_3_10@ / (3 * tma_info_core_core_clks)", "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group", "MetricName": "tma_load_op_utilization", "MetricThreshold": "tma_load_op_utilization > 0.6", @@ -2064,7 +2047,7 @@ }, { "BriefDescription": "This metric estimates the fraction of cycles where the Second-level TLB (STLB) was missed by load accesses, performing a hardware page walk", - "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_thread_clks", + "MetricExpr": "cpu_core@DTLB_LOAD_MISSES.WALK_ACTIVE@ / tma_info_thread_clks", "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group", "MetricName": "tma_load_stlb_miss", "MetricThreshold": "tma_load_stlb_miss > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))", @@ -2073,7 +2056,6 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(16 * max(0, cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ - cpu_core@L2_RQSTS.ALL_RFO@) + cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ / cpu_core@MEM_INST_RETIRED.ALL_STORES@ * (10 * cpu_core@L2_RQSTS.RFO_HIT@ + min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO@))) / tma_info_thread_clks", "MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group", "MetricName": "tma_lock_latency", @@ -2136,6 +2118,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "13 * cpu_core@MISC2_RETIRED.LFENCE@ / tma_info_thread_clks", "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", "MetricName": "tma_memory_fence", @@ -2145,7 +2128,6 @@ }, { "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_light_operations * cpu_core@MEM_UOP_RETIRED.ANY@ / (tma_retiring * tma_info_thread_slots)", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_memory_operations", @@ -2155,7 +2137,7 @@ }, { "BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit", - "MetricExpr": "UOPS_RETIRED.MS / tma_info_thread_slots", + "MetricExpr": "cpu_core@UOPS_RETIRED.MS@ / tma_info_thread_slots", "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS", "MetricName": "tma_microcode_sequencer", "MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1", @@ -2225,7 +2207,6 @@ }, { "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_other_light_ops", @@ -2246,7 +2227,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)", - "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_core_clks", + "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_0@ / tma_info_core_core_clks", "MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P", "MetricName": "tma_port_0", "MetricThreshold": "tma_port_0 > 0.6", @@ -2256,7 +2237,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)", - "MetricExpr": "UOPS_DISPATCHED.PORT_1 / tma_info_core_core_clks", + "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_1@ / tma_info_core_core_clks", "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P", "MetricName": "tma_port_1", "MetricThreshold": "tma_port_1 > 0.6", @@ -2266,7 +2247,7 @@ }, { "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)", - "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_core_clks", + "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_6@ / tma_info_core_core_clks", "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P", "MetricName": "tma_port_6", "MetricThreshold": "tma_port_6 > 0.6", @@ -2296,7 +2277,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", - "MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_thread_clks", + "MetricExpr": "cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ / tma_info_thread_clks", "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group", "MetricName": "tma_ports_utilized_1", "MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))", @@ -2306,7 +2287,8 @@ }, { "BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", - "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", + "MetricExpr": "cpu_core@EXE_ACTIVITY.2_PORTS_UTIL@ / tma_info_thread_clks", "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group", "MetricName": "tma_ports_utilized_2", "MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))", @@ -2316,7 +2298,8 @@ }, { "BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", - "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", + "MetricExpr": "cpu_core@UOPS_EXECUTED.CYCLES_GE_3@ / tma_info_thread_clks", "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group", "MetricName": "tma_ports_utilized_3m", "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))", @@ -2338,7 +2321,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations", - "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks", + "MetricExpr": "cpu_core@RESOURCE_STALLS.SCOREBOARD@ / tma_info_thread_clks", "MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group", "MetricName": "tma_serializing_operation", "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))", @@ -2348,7 +2331,7 @@ }, { "BriefDescription": "This metric represents Shuffle (cross \"vector lane\" data transfers) uops fraction the CPU has retired.", - "MetricExpr": "INT_VEC_RETIRED.SHUFFLES / (tma_retiring * tma_info_thread_slots)", + "MetricExpr": "cpu_core@INT_VEC_RETIRED.SHUFFLES@ / (tma_retiring * tma_info_thread_slots)", "MetricGroup": "HPC;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group", "MetricName": "tma_shuffles", "MetricThreshold": "tma_shuffles > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)", @@ -2357,7 +2340,8 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions", - "MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", + "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.PAUSE@ / tma_info_thread_clks", "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", "MetricName": "tma_slow_pause", "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))", @@ -2377,8 +2361,7 @@ }, { "BriefDescription": "This metric represents rate of split store accesses", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", - "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks", + "MetricExpr": "cpu_core@MEM_INST_RETIRED.SPLIT_STORES@ / tma_info_core_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group", "MetricName": "tma_split_stores", "MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", @@ -2398,7 +2381,7 @@ }, { "BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write", - "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_thread_clks", + "MetricExpr": "cpu_core@EXE_ACTIVITY.BOUND_ON_STORES@ / tma_info_thread_clks", "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_store_bound", "MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)", @@ -2408,7 +2391,6 @@ }, { "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "13 * cpu_core@LD_BLOCKS.STORE_FORWARD@ / tma_info_thread_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_store_fwd_blk", @@ -2448,7 +2430,7 @@ }, { "BriefDescription": "This metric estimates the fraction of cycles where the STLB was missed by store accesses, performing a hardware page walk", - "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_core_clks", + "MetricExpr": "cpu_core@DTLB_STORE_MISSES.WALK_ACTIVE@ / tma_info_core_core_clks", "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group", "MetricName": "tma_store_stlb_miss", "MetricThreshold": "tma_store_stlb_miss > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))", @@ -2467,7 +2449,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears", - "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / tma_info_thread_clks", + "MetricExpr": "cpu_core@INT_MISC.UNKNOWN_BRANCH_CYCLES@ / tma_info_thread_clks", "MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group", "MetricName": "tma_unknown_branches", "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))", diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json index c150c14ac6ed9..a35edf7d86a97 100644 --- a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json +++ b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json @@ -195,7 +195,6 @@ }, { "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_dram_bound", @@ -457,7 +456,6 @@ }, { "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l2_bound", @@ -466,7 +464,6 @@ }, { "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD", "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", @@ -683,7 +680,6 @@ }, { "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_store_fwd_blk", diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json index e31a4aac9f205..56e54babcc26f 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json @@ -400,7 +400,6 @@ }, { "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(76 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 75.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks", "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", "MetricName": "tma_contested_accesses", @@ -421,7 +420,6 @@ }, { "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "75.5 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks", "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", "MetricName": "tma_data_sharing", @@ -449,7 +447,6 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks - tma_pmm_bound if #has_pmem > 0 else MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks)", "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_dram_bound", @@ -656,7 +653,6 @@ }, { "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "Bad;BrMispredicts;tma_issueBM", "MetricName": "tma_info_bad_spec_branch_misprediction_cost", @@ -699,7 +695,6 @@ }, { "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)", "MetricGroup": "Cor;SMT", "MetricName": "tma_info_botlnk_l0_core_bound_likely", @@ -707,7 +702,6 @@ }, { "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))", "MetricGroup": "DSBmiss;Fed;tma_issueFB", "MetricName": "tma_info_botlnk_l2_dsb_misses", @@ -716,7 +710,6 @@ }, { "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL", "MetricName": "tma_info_botlnk_l2_ic_misses", @@ -725,7 +718,6 @@ }, { "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)", "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC", "MetricName": "tma_info_bottleneck_big_code", @@ -742,7 +734,6 @@ }, { "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code", "MetricGroup": "Fed;FetchBW;Frontend", "MetricName": "tma_info_bottleneck_instruction_fetch_bw", @@ -750,7 +741,6 @@ }, { "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))", "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW", "MetricName": "tma_info_bottleneck_memory_bandwidth", @@ -759,7 +749,6 @@ }, { "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))", "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB", "MetricName": "tma_info_bottleneck_memory_data_tlbs", @@ -768,7 +757,6 @@ }, { "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound))", "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat", "MetricName": "tma_info_bottleneck_memory_latency", @@ -777,7 +765,6 @@ }, { "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM", "MetricName": "tma_info_bottleneck_mispredictions", @@ -1301,6 +1288,7 @@ }, { "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)", + "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_system_socket_clks / duration_time)", "MetricGroup": "Mem;MemoryLat;SoC", "MetricName": "tma_info_system_mem_read_latency", @@ -1455,7 +1443,6 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L1D_MISS - MEMORY_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks", "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l2_bound", @@ -1465,7 +1452,6 @@ }, { "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L2_MISS - MEMORY_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks", "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", "MetricName": "tma_l3_bound", @@ -1538,7 +1524,6 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks", "MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group", "MetricName": "tma_lock_latency", @@ -1596,6 +1581,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "13 * MISC2_RETIRED.LFENCE / tma_info_thread_clks", "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", "MetricName": "tma_memory_fence", @@ -1604,7 +1590,6 @@ }, { "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "tma_light_operations * MEM_UOP_RETIRED.ANY / (tma_retiring * tma_info_thread_slots)", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_memory_operations", @@ -1676,7 +1661,6 @@ }, { "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes", - "MetricConstraint": "NO_GROUP_EVENTS", "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))", "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", "MetricName": "tma_other_light_ops", @@ -1758,6 +1742,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks", "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group", "MetricName": "tma_ports_utilized_2", @@ -1767,6 +1752,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks", "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group", "MetricName": "tma_ports_utilized_3m", @@ -1822,6 +1808,7 @@ }, { "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions", + "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks", "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", "MetricName": "tma_slow_pause", @@ -1840,7 +1827,6 @@ }, { "BriefDescription": "This metric represents rate of split store accesses", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group", "MetricName": "tma_split_stores", @@ -1868,7 +1854,6 @@ }, { "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores", - "MetricConstraint": "NO_GROUP_EVENTS_NMI", "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks", "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", "MetricName": "tma_store_fwd_blk", -- GitLab From 20d03ae36ec010aa97a97495d9dd9202cb93cb87 Mon Sep 17 00:00:00 2001 From: Udipto Goswami Date: Mon, 8 Jan 2024 18:57:20 +0530 Subject: [PATCH 0240/1405] usb: gadget: ncm: Fix indentations in documentation of NCM section Currently, the section of NCM which describes attributes are having wrong indentation. Fix this by following the correct format recommended. Fixes: 1900daeefd3e ("usb: gadget: ncm: Add support to update wMaxSegmentSize via configfs") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/all/20240108160221.743649b5@canb.auug.org.au/ Signed-off-by: Udipto Goswami Link: https://lore.kernel.org/r/20240108132720.7786-1-quic_ugoswami@quicinc.com Signed-off-by: Greg Kroah-Hartman --- Documentation/usb/gadget-testing.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Documentation/usb/gadget-testing.rst b/Documentation/usb/gadget-testing.rst index 8cd62c466d20a..077dfac7ed98f 100644 --- a/Documentation/usb/gadget-testing.rst +++ b/Documentation/usb/gadget-testing.rst @@ -448,17 +448,17 @@ Function-specific configfs interface The function name to use when creating the function directory is "ncm". The NCM function provides these attributes in its function directory: - =============== ================================================== - ifname network device interface name associated with this - function instance - qmult queue length multiplier for high and super speed - host_addr MAC address of host's end of this - Ethernet over USB link - dev_addr MAC address of device's end of this - Ethernet over USB link - max_segment_size Segment size required for P2P connections. This - will set MTU to (max_segment_size - 14 bytes) - =============== ================================================== + ======================= ================================================== + ifname network device interface name associated with this + function instance + qmult queue length multiplier for high and super speed + host_addr MAC address of host's end of this + Ethernet over USB link + dev_addr MAC address of device's end of this + Ethernet over USB link + max_segment_size Segment size required for P2P connections. This + will set MTU to 14 bytes + ======================= ================================================== and after creating the functions/ncm. they contain default values: qmult is 5, dev_addr and host_addr are randomly selected. -- GitLab From 817349b6d26aadd8b38283a05ce0bab106b4c765 Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Tue, 16 Jan 2024 11:28:15 +0530 Subject: [PATCH 0241/1405] usb: dwc3: host: Set XHCI_SG_TRB_CACHE_SIZE_QUIRK Upstream commit bac1ec551434 ("usb: xhci: Set quirk for XHCI_SG_TRB_CACHE_SIZE_QUIRK") introduced a new quirk in XHCI which fixes XHC timeout, which was seen on synopsys XHCs while using SG buffers. But the support for this quirk isn't present in the DWC3 layer. We will encounter this XHCI timeout/hung issue if we run iperf loopback tests using RTL8156 ethernet adaptor on DWC3 targets with scatter-gather enabled. This gets resolved after enabling the XHCI_SG_TRB_CACHE_SIZE_QUIRK. This patch enables it using the xhci device property since its needed for DWC3 controller. In Synopsys DWC3 databook, Table 9-3: xHCI Debug Capability Limitations Chained TRBs greater than TRB cache size: The debug capability driver must not create a multi-TRB TD that describes smaller than a 1K packet that spreads across 8 or more TRBs on either the IN TR or the OUT TR. Cc: stable@vger.kernel.org #5.11 Signed-off-by: Prashanth K Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20240116055816.1169821-2-quic_prashk@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/host.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c index 61f57fe5bb783..43230915323c7 100644 --- a/drivers/usb/dwc3/host.c +++ b/drivers/usb/dwc3/host.c @@ -61,7 +61,7 @@ static int dwc3_host_get_irq(struct dwc3 *dwc) int dwc3_host_init(struct dwc3 *dwc) { - struct property_entry props[4]; + struct property_entry props[5]; struct platform_device *xhci; int ret, irq; int prop_idx = 0; @@ -89,6 +89,8 @@ int dwc3_host_init(struct dwc3 *dwc) memset(props, 0, sizeof(struct property_entry) * ARRAY_SIZE(props)); + props[prop_idx++] = PROPERTY_ENTRY_BOOL("xhci-sg-trb-cache-size-quirk"); + if (dwc->usb3_lpm_capable) props[prop_idx++] = PROPERTY_ENTRY_BOOL("usb3-lpm-capable"); -- GitLab From 520b391e3e813c1dd142d1eebb3ccfa6d08c3995 Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Tue, 16 Jan 2024 11:28:16 +0530 Subject: [PATCH 0242/1405] usb: host: xhci-plat: Add support for XHCI_SG_TRB_CACHE_SIZE_QUIRK Upstream commit bac1ec551434 ("usb: xhci: Set quirk for XHCI_SG_TRB_CACHE_SIZE_QUIRK") introduced a new quirk in XHCI which fixes XHC timeout, which was seen on synopsys XHCs while using SG buffers. Currently this quirk can only be set using xhci private data. But there are some drivers like dwc3/host.c which adds adds quirks using software node for xhci device. Hence set this xhci quirk by iterating over device properties. Cc: stable@vger.kernel.org # 5.11 Fixes: bac1ec551434 ("usb: xhci: Set quirk for XHCI_SG_TRB_CACHE_SIZE_QUIRK") Signed-off-by: Prashanth K Link: https://lore.kernel.org/r/20240116055816.1169821-3-quic_prashk@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index f04fde19f5514..3d071b8753088 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -253,6 +253,9 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s if (device_property_read_bool(tmpdev, "quirk-broken-port-ped")) xhci->quirks |= XHCI_BROKEN_PORT_PED; + if (device_property_read_bool(tmpdev, "xhci-sg-trb-cache-size-quirk")) + xhci->quirks |= XHCI_SG_TRB_CACHE_SIZE_QUIRK; + device_property_read_u32(tmpdev, "imod-interval-ns", &xhci->imod_interval); } -- GitLab From 9dc292413c56a2d01e34787d3fc4a76635e4a498 Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Thu, 18 Jan 2024 21:18:53 +0530 Subject: [PATCH 0243/1405] usb: gadget: ncm: Fix endianness of wMaxSegmentSize variable in ecm_desc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent commit [1] added support for changing max segment size of the NCM interface via configfs. But the value of segment size value stored in ncm_opts need to be converted to little endian before saving it in ecm_desc. Also while initialising the value of segment size in opts during instance allocation, the value ETH_FRAME_LEN needs to be assigned directly without any conversion as ETH_FRAME_LEN and the variable max_segment_size are native endian. The current implementaion modifies it into little endian thus breaking things for big endian targets. Fix endianness while assigning these variables. While at it, fix up some stray spaces in comments added in code. [1]: https://lore.kernel.org/all/20231221153216.18657-1-quic_kriskura@quicinc.com/ Fixes: 1900daeefd3e ("usb: gadget: ncm: Add support to update wMaxSegmentSize via configfs") Signed-off-by: Krishna Kurapati Reviewed-by: Maciej Żenczykowski Link: https://lore.kernel.org/r/20240118154910.8765-1-quic_kriskura@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ncm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index a1575a0ca568d..ca5d5f5649982 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -105,8 +105,8 @@ static inline struct f_ncm *func_to_ncm(struct usb_function *f) /* * Although max mtu as dictated by u_ether is 15412 bytes, setting - * max_segment_sizeto 15426 would not be efficient. If user chooses segment - * size to be (>= 8192), then we can't aggregate more than one buffer in each + * max_segment_size to 15426 would not be efficient. If user chooses segment + * size to be (>= 8192), then we can't aggregate more than one buffer in each * NTB (assuming each packet coming from network layer is >= 8192 bytes) as ep * maxpacket limit is 16384. So let max_segment_size be limited to 8000 to allow * at least 2 packets to be aggregated reducing wastage of NTB buffer space @@ -1489,7 +1489,7 @@ static int ncm_bind(struct usb_configuration *c, struct usb_function *f) ncm_data_intf.bInterfaceNumber = status; ncm_union_desc.bSlaveInterface0 = status; - ecm_desc.wMaxSegmentSize = ncm_opts->max_segment_size; + ecm_desc.wMaxSegmentSize = cpu_to_le16(ncm_opts->max_segment_size); status = -ENODEV; @@ -1685,7 +1685,7 @@ static struct usb_function_instance *ncm_alloc_inst(void) kfree(opts); return ERR_CAST(net); } - opts->max_segment_size = cpu_to_le16(ETH_FRAME_LEN); + opts->max_segment_size = ETH_FRAME_LEN; INIT_LIST_HEAD(&opts->ncm_os_desc.ext_prop); descs[0] = &opts->ncm_os_desc; -- GitLab From a54a594d72f25b08f39d743880a76721fba9ae77 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 25 Jan 2024 17:27:34 +0200 Subject: [PATCH 0244/1405] xhci: fix possible null pointer dereference at secondary interrupter removal Don't try to remove a secondary interrupter that is known to be invalid. Also check if the interrupter is valid inside the spinlock that protects the array of interrupters. Found by smatch static checker Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-usb/ffaa0a1b-5984-4a1f-bfd3-9184630a97b9@moroto.mountain/ Fixes: c99b38c41234 ("xhci: add support to allocate several interrupters") Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240125152737.2983959-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 4460fa7e9fab9..d00d4d9372366 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1861,14 +1861,14 @@ void xhci_remove_secondary_interrupter(struct usb_hcd *hcd, struct xhci_interrup struct xhci_hcd *xhci = hcd_to_xhci(hcd); unsigned int intr_num; + spin_lock_irq(&xhci->lock); + /* interrupter 0 is primary interrupter, don't touch it */ - if (!ir || !ir->intr_num || ir->intr_num >= xhci->max_interrupters) + if (!ir || !ir->intr_num || ir->intr_num >= xhci->max_interrupters) { xhci_dbg(xhci, "Invalid secondary interrupter, can't remove\n"); - - /* fixme, should we check xhci->interrupter[intr_num] == ir */ - /* fixme locking */ - - spin_lock_irq(&xhci->lock); + spin_unlock_irq(&xhci->lock); + return; + } intr_num = ir->intr_num; -- GitLab From 09f197225cbc35db8ac135659cdd21bc1e29bda0 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 25 Jan 2024 17:27:35 +0200 Subject: [PATCH 0245/1405] xhci: fix off by one check when adding a secondary interrupter. The sanity check of interrupter index when adding a new interrupter is off by one. intr_num needs to be smaller than xhci->max_interrupter to fit the array of interrupters. Luckily this doesn't cause any real word harm as xhci_add_interrupter() is always called with a intr_num value smaller than xhci->max_interrupters in any current kernel. Should not be needed for stable as 6.7 kernel and older only supports one interrupter, with intr_num always being zero. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-usb/e9771296-586d-456a-ac24-a82de79bb2e6@moroto.mountain/ Fixes: 4bf398e15aa4 ("xhci: split allocate interrupter into separate alloacte and add parts") Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240125152737.2983959-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index d00d4d9372366..a7716202a8dd5 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2322,7 +2322,7 @@ xhci_add_interrupter(struct xhci_hcd *xhci, struct xhci_interrupter *ir, u64 erst_base; u32 erst_size; - if (intr_num > xhci->max_interrupters) { + if (intr_num >= xhci->max_interrupters) { xhci_warn(xhci, "Can't add interrupter %d, max interrupters %d\n", intr_num, xhci->max_interrupters); return -EINVAL; -- GitLab From 5372c65e1311a16351ef03dd096ff576e6477674 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 25 Jan 2024 17:27:36 +0200 Subject: [PATCH 0246/1405] xhci: process isoc TD properly when there was a transaction error mid TD. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The last TRB of a isoc TD might not trigger an event if there was an error event for a TRB mid TD. This is seen on a NEC Corporation uPD720200 USB 3.0 Host After an error mid a multi-TRB TD the xHC should according to xhci 4.9.1 generate events for passed TRBs with IOC flag set if it proceeds to the next TD. This event is either a copy of the original error, or a "success" transfer event. If that event is missing then the driver and xHC host get out of sync as the driver is still expecting a transfer event for that first TD, while xHC host is already sending events for the next TD in the list. This leads to "Transfer event TRB DMA ptr not part of current TD" messages. As a solution we tag the isoc TDs that get error events mid TD. If an event doesn't match the first TD, then check if the tag is set, and event points to the next TD. In that case give back the fist TD and process the next TD normally Make sure TD status and transferred length stay valid in both cases with and without final TD completion event. Reported-by: Michał Pecio Closes: https://lore.kernel.org/linux-usb/20240112235205.1259f60c@foxbook/ Tested-by: Michał Pecio Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240125152737.2983959-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 74 +++++++++++++++++++++++++++++------- drivers/usb/host/xhci.h | 1 + 2 files changed, 61 insertions(+), 14 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 33806ae966f90..41be7d31a36ea 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2376,6 +2376,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, /* handle completion code */ switch (trb_comp_code) { case COMP_SUCCESS: + /* Don't overwrite status if TD had an error, see xHCI 4.9.1 */ + if (td->error_mid_td) + break; if (remaining) { frame->status = short_framestatus; if (xhci->quirks & XHCI_TRUST_TX_LENGTH) @@ -2401,8 +2404,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, break; case COMP_USB_TRANSACTION_ERROR: frame->status = -EPROTO; + sum_trbs_for_length = true; if (ep_trb != td->last_trb) - return 0; + td->error_mid_td = true; break; case COMP_STOPPED: sum_trbs_for_length = true; @@ -2422,6 +2426,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, break; } + if (td->urb_length_set) + goto finish_td; + if (sum_trbs_for_length) frame->actual_length = sum_trb_lengths(xhci, ep->ring, ep_trb) + ep_trb_len - remaining; @@ -2430,6 +2437,14 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, td->urb->actual_length += frame->actual_length; +finish_td: + /* Don't give back TD yet if we encountered an error mid TD */ + if (td->error_mid_td && ep_trb != td->last_trb) { + xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n"); + td->urb_length_set = true; + return 0; + } + return finish_td(xhci, ep, ep_ring, td, trb_comp_code); } @@ -2808,17 +2823,51 @@ static int handle_tx_event(struct xhci_hcd *xhci, } if (!ep_seg) { - if (!ep->skip || - !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { - /* Some host controllers give a spurious - * successful event after a short transfer. - * Ignore it. - */ - if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && - ep_ring->last_td_was_short) { - ep_ring->last_td_was_short = false; - goto cleanup; + + if (ep->skip && usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { + skip_isoc_td(xhci, td, ep, status); + goto cleanup; + } + + /* + * Some hosts give a spurious success event after a short + * transfer. Ignore it. + */ + if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && + ep_ring->last_td_was_short) { + ep_ring->last_td_was_short = false; + goto cleanup; + } + + /* + * xhci 4.10.2 states isoc endpoints should continue + * processing the next TD if there was an error mid TD. + * So host like NEC don't generate an event for the last + * isoc TRB even if the IOC flag is set. + * xhci 4.9.1 states that if there are errors in mult-TRB + * TDs xHC should generate an error for that TRB, and if xHC + * proceeds to the next TD it should genete an event for + * any TRB with IOC flag on the way. Other host follow this. + * So this event might be for the next TD. + */ + if (td->error_mid_td && + !list_is_last(&td->td_list, &ep_ring->td_list)) { + struct xhci_td *td_next = list_next_entry(td, td_list); + + ep_seg = trb_in_td(xhci, td_next->start_seg, td_next->first_trb, + td_next->last_trb, ep_trb_dma, false); + if (ep_seg) { + /* give back previous TD, start handling new */ + xhci_dbg(xhci, "Missing TD completion event after mid TD error\n"); + ep_ring->dequeue = td->last_trb; + ep_ring->deq_seg = td->last_trb_seg; + inc_deq(xhci, ep_ring); + xhci_td_cleanup(xhci, td, ep_ring, td->status); + td = td_next; } + } + + if (!ep_seg) { /* HC is busted, give up! */ xhci_err(xhci, "ERROR Transfer event TRB DMA ptr not " @@ -2830,9 +2879,6 @@ static int handle_tx_event(struct xhci_hcd *xhci, ep_trb_dma, true); return -ESHUTDOWN; } - - skip_isoc_td(xhci, td, ep, status); - goto cleanup; } if (trb_comp_code == COMP_SHORT_PACKET) ep_ring->last_td_was_short = true; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index a5c72a634e6a9..6f82d404883f9 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1549,6 +1549,7 @@ struct xhci_td { struct xhci_segment *bounce_seg; /* actual_length of the URB has already been set */ bool urb_length_set; + bool error_mid_td; unsigned int num_trbs; }; -- GitLab From 7c4650ded49e5b88929ecbbb631efb8b0838e811 Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Thu, 25 Jan 2024 17:27:37 +0200 Subject: [PATCH 0247/1405] xhci: handle isoc Babble and Buffer Overrun events properly xHCI 4.9 explicitly forbids assuming that the xHC has released its ownership of a multi-TRB TD when it reports an error on one of the early TRBs. Yet the driver makes such assumption and releases the TD, allowing the remaining TRBs to be freed or overwritten by new TDs. The xHC should also report completion of the final TRB due to its IOC flag being set by us, regardless of prior errors. This event cannot be recognized if the TD has already been freed earlier, resulting in "Transfer event TRB DMA ptr not part of current TD" error message. Fix this by reusing the logic for processing isoc Transaction Errors. This also handles hosts which fail to report the final completion. Fix transfer length reporting on Babble errors. They may be caused by device malfunction, no guarantee that the buffer has been filled. Signed-off-by: Michal Pecio Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240125152737.2983959-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 41be7d31a36ea..f0d8a607ff214 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2394,9 +2394,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, case COMP_BANDWIDTH_OVERRUN_ERROR: frame->status = -ECOMM; break; - case COMP_ISOCH_BUFFER_OVERRUN: case COMP_BABBLE_DETECTED_ERROR: + sum_trbs_for_length = true; + fallthrough; + case COMP_ISOCH_BUFFER_OVERRUN: frame->status = -EOVERFLOW; + if (ep_trb != td->last_trb) + td->error_mid_td = true; break; case COMP_INCOMPATIBLE_DEVICE_ERROR: case COMP_STALL_ERROR: -- GitLab From 12783c0b9e2c7915a50d5ec829630ff2da50472c Mon Sep 17 00:00:00 2001 From: Udipto Goswami Date: Wed, 10 Jan 2024 15:28:14 +0530 Subject: [PATCH 0248/1405] usb: core: Prevent null pointer dereference in update_port_device_state Currently, the function update_port_device_state gets the usb_hub from udev->parent by calling usb_hub_to_struct_hub. However, in case the actconfig or the maxchild is 0, the usb_hub would be NULL and upon further accessing to get port_dev would result in null pointer dereference. Fix this by introducing an if check after the usb_hub is populated. Fixes: 83cb2604f641 ("usb: core: add sysfs entry for usb device state") Cc: stable@vger.kernel.org Signed-off-by: Udipto Goswami Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20240110095814.7626-1-quic_ugoswami@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index ffd7c99e24a36..48409d51ea438 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2053,9 +2053,19 @@ static void update_port_device_state(struct usb_device *udev) if (udev->parent) { hub = usb_hub_to_struct_hub(udev->parent); - port_dev = hub->ports[udev->portnum - 1]; - WRITE_ONCE(port_dev->state, udev->state); - sysfs_notify_dirent(port_dev->state_kn); + + /* + * The Link Layer Validation System Driver (lvstest) + * has a test step to unbind the hub before running the + * rest of the procedure. This triggers hub_disconnect + * which will set the hub's maxchild to 0, further + * resulting in usb_hub_to_struct_hub returning NULL. + */ + if (hub) { + port_dev = hub->ports[udev->portnum - 1]; + WRITE_ONCE(port_dev->state, udev->state); + sysfs_notify_dirent(port_dev->state_kn); + } } } -- GitLab From de4b5b28c87ccae4da268a53c5df135437f5cfde Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 15 Jan 2024 11:28:20 +0200 Subject: [PATCH 0249/1405] usb: dwc3: pci: add support for the Intel Arrow Lake-H This patch adds the necessary PCI ID for Intel Arrow Lake-H devices. Acked-by: Thinh Nguyen Signed-off-by: Heikki Krogerus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240115092820.1454492-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 6604845c397cd..39564e17f3b07 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -51,6 +51,8 @@ #define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1 #define PCI_DEVICE_ID_INTEL_MTLS 0x7f6f #define PCI_DEVICE_ID_INTEL_MTL 0x7e7e +#define PCI_DEVICE_ID_INTEL_ARLH 0x7ec1 +#define PCI_DEVICE_ID_INTEL_ARLH_PCH 0x777e #define PCI_DEVICE_ID_INTEL_TGL 0x9a15 #define PCI_DEVICE_ID_AMD_MR 0x163a @@ -421,6 +423,8 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, MTLP, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, MTL, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, MTLS, &dwc3_pci_intel_swnode) }, + { PCI_DEVICE_DATA(INTEL, ARLH, &dwc3_pci_intel_swnode) }, + { PCI_DEVICE_DATA(INTEL, ARLH_PCH, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(INTEL, TGL, &dwc3_pci_intel_swnode) }, { PCI_DEVICE_DATA(AMD, NL_USB, &dwc3_pci_amd_swnode) }, -- GitLab From 61a348857e869432e6a920ad8ea9132e8d44c316 Mon Sep 17 00:00:00 2001 From: Uttkarsh Aggarwal Date: Fri, 19 Jan 2024 15:18:25 +0530 Subject: [PATCH 0250/1405] usb: dwc3: gadget: Fix NULL pointer dereference in dwc3_gadget_suspend In current scenario if Plug-out and Plug-In performed continuously there could be a chance while checking for dwc->gadget_driver in dwc3_gadget_suspend, a NULL pointer dereference may occur. Call Stack: CPU1: CPU2: gadget_unbind_driver dwc3_suspend_common dwc3_gadget_stop dwc3_gadget_suspend dwc3_disconnect_gadget CPU1 basically clears the variable and CPU2 checks the variable. Consider CPU1 is running and right before gadget_driver is cleared and in parallel CPU2 executes dwc3_gadget_suspend where it finds dwc->gadget_driver which is not NULL and resumes execution and then CPU1 completes execution. CPU2 executes dwc3_disconnect_gadget where it checks dwc->gadget_driver is already NULL because of which the NULL pointer deference occur. Cc: stable@vger.kernel.org Fixes: 9772b47a4c29 ("usb: dwc3: gadget: Fix suspend/resume during device mode") Acked-by: Thinh Nguyen Signed-off-by: Uttkarsh Aggarwal Link: https://lore.kernel.org/r/20240119094825.26530-1-quic_uaggarwa@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 019368f8e9c4c..564976b3e2b91 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4709,15 +4709,13 @@ int dwc3_gadget_suspend(struct dwc3 *dwc) unsigned long flags; int ret; - if (!dwc->gadget_driver) - return 0; - ret = dwc3_gadget_soft_disconnect(dwc); if (ret) goto err; spin_lock_irqsave(&dwc->lock, flags); - dwc3_disconnect_gadget(dwc); + if (dwc->gadget_driver) + dwc3_disconnect_gadget(dwc); spin_unlock_irqrestore(&dwc->lock, flags); return 0; -- GitLab From cc509b6a47e7c8998d9e41c273191299d5d9d631 Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Fri, 19 Jan 2024 20:35:37 +0800 Subject: [PATCH 0251/1405] usb: chipidea: core: handle power lost in workqueue When power is recycled in usb controller during system power management, the controller will recognize it and switch role if role has been changed during power lost. In current design, it will be completed in resume() function. However, this may bring issues since usb class devices have their pm operations too and these device's resume() functions are still not being called at this point. When usb controller recognized host role should be stopped, these usb class devices will be removed at this point. But these usb class devices can't be removed in some cases, such as scsi devices. Since scsi driver may sync data to U-disk, however it will block there because scsi drvier can only handle pm request when is in suspended state. Therefore, there may exist a dependency between ci_resume() and usb class device's resume(). To break this potential dependency, we need to handle power lost work in a workqueue. Fixes: 74494b33211d ("usb: chipidea: core: add controller resume support when controller is powered off") cc: stable@vger.kernel.org Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20240119123537.3614838-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci.h | 2 ++ drivers/usb/chipidea/core.c | 44 ++++++++++++++++++++----------------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/usb/chipidea/ci.h b/drivers/usb/chipidea/ci.h index d9bb3d3f026e6..2a38e1eb65466 100644 --- a/drivers/usb/chipidea/ci.h +++ b/drivers/usb/chipidea/ci.h @@ -176,6 +176,7 @@ struct hw_bank { * @enabled_otg_timer_bits: bits of enabled otg timers * @next_otg_timer: next nearest enabled timer to be expired * @work: work for role changing + * @power_lost_work: work for power lost handling * @wq: workqueue thread * @qh_pool: allocation pool for queue heads * @td_pool: allocation pool for transfer descriptors @@ -226,6 +227,7 @@ struct ci_hdrc { enum otg_fsm_timer next_otg_timer; struct usb_role_switch *role_switch; struct work_struct work; + struct work_struct power_lost_work; struct workqueue_struct *wq; struct dma_pool *qh_pool; diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 41014f93cfdf3..835bf2428dc6e 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -856,6 +856,27 @@ static int ci_extcon_register(struct ci_hdrc *ci) return 0; } +static void ci_power_lost_work(struct work_struct *work) +{ + struct ci_hdrc *ci = container_of(work, struct ci_hdrc, power_lost_work); + enum ci_role role; + + disable_irq_nosync(ci->irq); + pm_runtime_get_sync(ci->dev); + if (!ci_otg_is_fsm_mode(ci)) { + role = ci_get_role(ci); + + if (ci->role != role) { + ci_handle_id_switch(ci); + } else if (role == CI_ROLE_GADGET) { + if (ci->is_otg && hw_read_otgsc(ci, OTGSC_BSV)) + usb_gadget_vbus_connect(&ci->gadget); + } + } + pm_runtime_put_sync(ci->dev); + enable_irq(ci->irq); +} + static DEFINE_IDA(ci_ida); struct platform_device *ci_hdrc_add_device(struct device *dev, @@ -1045,6 +1066,8 @@ static int ci_hdrc_probe(struct platform_device *pdev) spin_lock_init(&ci->lock); mutex_init(&ci->mutex); + INIT_WORK(&ci->power_lost_work, ci_power_lost_work); + ci->dev = dev; ci->platdata = dev_get_platdata(dev); ci->imx28_write_fix = !!(ci->platdata->flags & @@ -1396,25 +1419,6 @@ static int ci_suspend(struct device *dev) return 0; } -static void ci_handle_power_lost(struct ci_hdrc *ci) -{ - enum ci_role role; - - disable_irq_nosync(ci->irq); - if (!ci_otg_is_fsm_mode(ci)) { - role = ci_get_role(ci); - - if (ci->role != role) { - ci_handle_id_switch(ci); - } else if (role == CI_ROLE_GADGET) { - if (ci->is_otg && hw_read_otgsc(ci, OTGSC_BSV)) - usb_gadget_vbus_connect(&ci->gadget); - } - } - - enable_irq(ci->irq); -} - static int ci_resume(struct device *dev) { struct ci_hdrc *ci = dev_get_drvdata(dev); @@ -1446,7 +1450,7 @@ static int ci_resume(struct device *dev) ci_role(ci)->resume(ci, power_lost); if (power_lost) - ci_handle_power_lost(ci); + queue_work(system_freezable_wq, &ci->power_lost_work); if (ci->supports_runtime_pm) { pm_runtime_disable(dev); -- GitLab From f17c34ffc792bbb520e4b61baa16b6cfc7d44b13 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 22 Jan 2024 16:35:32 +0100 Subject: [PATCH 0252/1405] USB: hub: check for alternate port before enabling A_ALT_HNP_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The OTG 1.3 spec has the feature A_ALT_HNP_SUPPORT, which tells a device that it is connected to the wrong port. Some devices refuse to operate if you enable that feature, because it indicates to them that they ought to request to be connected to another port. According to the spec this feature may be used based only the following three conditions: 6.5.3 a_alt_hnp_support Setting this feature indicates to the B-device that it is connected to an A-device port that is not capable of HNP, but that the A-device does have an alternate port that is capable of HNP. The A-device is required to set this feature under the following conditions: • the A-device has multiple receptacles • the A-device port that connects to the B-device does not support HNP • the A-device has another port that does support HNP A check for the third and first condition is missing. Add it. Signed-off-by: Oliver Neukum Cc: stable Fixes: 7d2d641c44269 ("usb: otg: don't set a_alt_hnp_support feature for OTG 2.0 device") Link: https://lore.kernel.org/r/20240122153545.12284-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 48409d51ea438..e38a4124f6102 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2398,17 +2398,25 @@ static int usb_enumerate_device_otg(struct usb_device *udev) } } else if (desc->bLength == sizeof (struct usb_otg_descriptor)) { - /* Set a_alt_hnp_support for legacy otg device */ - err = usb_control_msg(udev, - usb_sndctrlpipe(udev, 0), - USB_REQ_SET_FEATURE, 0, - USB_DEVICE_A_ALT_HNP_SUPPORT, - 0, NULL, 0, - USB_CTRL_SET_TIMEOUT); - if (err < 0) - dev_err(&udev->dev, - "set a_alt_hnp_support failed: %d\n", - err); + /* + * We are operating on a legacy OTP device + * These should be told that they are operating + * on the wrong port if we have another port that does + * support HNP + */ + if (bus->otg_port != 0) { + /* Set a_alt_hnp_support for legacy otg device */ + err = usb_control_msg(udev, + usb_sndctrlpipe(udev, 0), + USB_REQ_SET_FEATURE, 0, + USB_DEVICE_A_ALT_HNP_SUPPORT, + 0, NULL, 0, + USB_CTRL_SET_TIMEOUT); + if (err < 0) + dev_err(&udev->dev, + "set a_alt_hnp_support failed: %d\n", + err); + } } } #endif -- GitLab From b2d2d7ea0dd09802cf5a0545bf54d8ad8987d20c Mon Sep 17 00:00:00 2001 From: yuan linyu Date: Tue, 23 Jan 2024 11:48:29 +0800 Subject: [PATCH 0253/1405] usb: f_mass_storage: forbid async queue when shutdown happen When write UDC to empty and unbind gadget driver from gadget device, it is possible that there are many queue failures for mass storage function. The root cause is mass storage main thread alaways try to queue request to receive a command from host if running flag is on, on platform like dwc3, if pull down called, it will not queue request again and return -ESHUTDOWN, but it not affect running flag of mass storage function. Check return code from mass storage function and clear running flag if it is -ESHUTDOWN, also indicate start in/out transfer failure to break loops. Cc: stable Signed-off-by: yuan linyu Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20240123034829.3848409-1-yuanlinyu@hihonor.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_mass_storage.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 722a3ab2b3379..c265a1f62fc14 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -545,21 +545,37 @@ static int start_transfer(struct fsg_dev *fsg, struct usb_ep *ep, static bool start_in_transfer(struct fsg_common *common, struct fsg_buffhd *bh) { + int rc; + if (!fsg_is_set(common)) return false; bh->state = BUF_STATE_SENDING; - if (start_transfer(common->fsg, common->fsg->bulk_in, bh->inreq)) + rc = start_transfer(common->fsg, common->fsg->bulk_in, bh->inreq); + if (rc) { bh->state = BUF_STATE_EMPTY; + if (rc == -ESHUTDOWN) { + common->running = 0; + return false; + } + } return true; } static bool start_out_transfer(struct fsg_common *common, struct fsg_buffhd *bh) { + int rc; + if (!fsg_is_set(common)) return false; bh->state = BUF_STATE_RECEIVING; - if (start_transfer(common->fsg, common->fsg->bulk_out, bh->outreq)) + rc = start_transfer(common->fsg, common->fsg->bulk_out, bh->outreq); + if (rc) { bh->state = BUF_STATE_FULL; + if (rc == -ESHUTDOWN) { + common->running = 0; + return false; + } + } return true; } -- GitLab From 032178972f8e992b90f9794a13265fec8c8314b0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 14 Jan 2024 16:30:08 -0800 Subject: [PATCH 0254/1405] usb: gadget: pch_udc: fix an Excess kernel-doc warning Delete one extraneous line of kernel-doc to prevent a kernel-doc warning: pch_udc.c:297: warning: Excess struct member 'desc' description in 'pch_udc_ep' Signed-off-by: Randy Dunlap Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Cc: tomoya-linux@dsn.lapis-semi.com Link: https://lore.kernel.org/r/20240115003008.5763-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/pch_udc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c index 4f8617210d852..169f72665739f 100644 --- a/drivers/usb/gadget/udc/pch_udc.c +++ b/drivers/usb/gadget/udc/pch_udc.c @@ -274,7 +274,6 @@ struct pch_udc_cfg_data { * @td_data: for data request * @dev: reference to device struct * @offset_addr: offset address of ep register - * @desc: for this ep * @queue: queue for requests * @num: endpoint number * @in: endpoint is IN -- GitLab From b717dfbf73e842d15174699fe2c6ee4fdde8aa1f Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 17 Jan 2024 11:47:42 +0000 Subject: [PATCH 0255/1405] Revert "usb: typec: tcpm: fix cc role at port reset" This reverts commit 1e35f074399dece73d5df11847d4a0d7a6f49434. Given that ERROR_RECOVERY calls into PORT_RESET for Hi-Zing the CC pins, setting CC pins to default state during PORT_RESET breaks error recovery. 4.5.2.2.2.1 ErrorRecovery State Requirements The port shall not drive VBUS or VCONN, and shall present a high-impedance to ground (above zOPEN) on its CC1 and CC2 pins. Hi-Zing the CC pins is the inteded behavior for PORT_RESET. CC pins are set to default state after tErrorRecovery in PORT_RESET_WAIT_OFF. 4.5.2.2.2.2 Exiting From ErrorRecovery State A Sink shall transition to Unattached.SNK after tErrorRecovery. A Source shall transition to Unattached.SRC after tErrorRecovery. Cc: stable@vger.kernel.org Cc: Frank Wang Fixes: 1e35f074399d ("usb: typec: tcpm: fix cc role at port reset") Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20240117114742.2587779-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 5945e3a2b0f78..9d410718eaf47 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4876,8 +4876,7 @@ static void run_state_machine(struct tcpm_port *port) break; case PORT_RESET: tcpm_reset_port(port); - tcpm_set_cc(port, tcpm_default_state(port) == SNK_UNATTACHED ? - TYPEC_CC_RD : tcpm_rp_cc(port)); + tcpm_set_cc(port, TYPEC_CC_OPEN); tcpm_set_state(port, PORT_RESET_WAIT_OFF, PD_T_ERROR_RECOVERY); break; -- GitLab From 3caf2b2ad7334ef35f55b95f3e1b138c6f77b368 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 26 Jan 2024 17:38:00 -0500 Subject: [PATCH 0256/1405] usb: ulpi: Fix debugfs directory leak The ULPI per-device debugfs root is named after the ulpi device's parent, but ulpi_unregister_interface tries to remove a debugfs directory named after the ulpi device itself. This results in the directory sticking around and preventing subsequent (deferred) probes from succeeding. Change the directory name to match the ulpi device. Fixes: bd0a0a024f2a ("usb: ulpi: Add debugfs support") Cc: stable@vger.kernel.org Signed-off-by: Sean Anderson Link: https://lore.kernel.org/r/20240126223800.2864613-1-sean.anderson@seco.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/ulpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index 84d91b1c1eed5..0886b19d2e1c8 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -301,7 +301,7 @@ static int ulpi_register(struct device *dev, struct ulpi *ulpi) return ret; } - root = debugfs_create_dir(dev_name(dev), ulpi_root); + root = debugfs_create_dir(dev_name(&ulpi->dev), ulpi_root); debugfs_create_file("regs", 0444, root, ulpi, &ulpi_regs_fops); dev_dbg(&ulpi->dev, "registered ULPI PHY: vendor %04x, product %04x\n", -- GitLab From c9aed03a0a683fd1600ea92f2ad32232d4736272 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Sun, 21 Jan 2024 21:41:21 +0100 Subject: [PATCH 0257/1405] usb: ucsi: Add missing ppm_lock Calling ->sync_write must be done while holding the PPM lock as the mailbox logic does not support concurrent commands. At least since the addition of partner task this means that ucsi_acknowledge_connector_change should be called with the PPM lock held as it calls ->sync_write. Thus protect the only call to ucsi_acknowledge_connector_change with the PPM. All other calls to ->sync_write already happen under the PPM lock. Fixes: b9aa02ca39a4 ("usb: typec: ucsi: Add polling mechanism for partner tasks like alt mode checking") Cc: stable@vger.kernel.org Signed-off-by: "Christian A. Ehrhardt" Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240121204123.275441-2-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 5392ec6989592..14f5a7bfae2e9 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -938,7 +938,9 @@ static void ucsi_handle_connector_change(struct work_struct *work) clear_bit(EVENT_PENDING, &con->ucsi->flags); + mutex_lock(&ucsi->ppm_lock); ret = ucsi_acknowledge_connector_change(ucsi); + mutex_unlock(&ucsi->ppm_lock); if (ret) dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret); -- GitLab From 2840143e393a4ddc1caab4372969ea337371168c Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Sun, 21 Jan 2024 21:41:22 +0100 Subject: [PATCH 0258/1405] usb: ucsi_acpi: Fix command completion handling In case of a spurious or otherwise delayed notification it is possible that CCI still reports the previous completion. The UCSI spec is aware of this and provides two completion bits in CCI, one for normal commands and one for acks. As acks and commands alternate the notification handler can determine if the completion bit is from the current command. The initial UCSI code correctly handled this but the distinction between the two completion bits was lost with the introduction of the new API. To fix this revive the ACK_PENDING bit for ucsi_acpi and only complete commands if the completion bit matches. Fixes: f56de278e8ec ("usb: typec: ucsi: acpi: Move to the new API") Cc: stable@vger.kernel.org Signed-off-by: "Christian A. Ehrhardt" Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240121204123.275441-3-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_acpi.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 6bbf490ac4010..fa222080887d5 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -73,9 +73,13 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, const void *val, size_t val_len) { struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + bool ack = UCSI_COMMAND(*(u64 *)val) == UCSI_ACK_CC_CI; int ret; - set_bit(COMMAND_PENDING, &ua->flags); + if (ack) + set_bit(ACK_PENDING, &ua->flags); + else + set_bit(COMMAND_PENDING, &ua->flags); ret = ucsi_acpi_async_write(ucsi, offset, val, val_len); if (ret) @@ -85,7 +89,10 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset, ret = -ETIMEDOUT; out_clear_bit: - clear_bit(COMMAND_PENDING, &ua->flags); + if (ack) + clear_bit(ACK_PENDING, &ua->flags); + else + clear_bit(COMMAND_PENDING, &ua->flags); return ret; } @@ -142,8 +149,10 @@ static void ucsi_acpi_notify(acpi_handle handle, u32 event, void *data) if (UCSI_CCI_CONNECTOR(cci)) ucsi_connector_change(ua->ucsi, UCSI_CCI_CONNECTOR(cci)); - if (test_bit(COMMAND_PENDING, &ua->flags) && - cci & (UCSI_CCI_ACK_COMPLETE | UCSI_CCI_COMMAND_COMPLETE)) + if (cci & UCSI_CCI_ACK_COMPLETE && test_bit(ACK_PENDING, &ua->flags)) + complete(&ua->complete); + if (cci & UCSI_CCI_COMMAND_COMPLETE && + test_bit(COMMAND_PENDING, &ua->flags)) complete(&ua->complete); } -- GitLab From f3be347ea42dbb0358cd8b2d8dc543a23b70a976 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Sun, 21 Jan 2024 21:41:23 +0100 Subject: [PATCH 0259/1405] usb: ucsi_acpi: Quirk to ack a connector change ack cmd The PPM on some Dell laptops seems to expect that the ACK_CC_CI command to clear the connector change notification is in turn followed by another ACK_CC_CI to acknowledge the ACK_CC_CI command itself. This is in violation of the UCSI spec that states: "The only notification that is not acknowledged by the OPM is the command completion notification for the ACK_CC_CI or the PPM_RESET command." Add a quirk to send this ack anyway. Apply the quirk to all Dell systems. On the first command that acks a connector change send a dummy command to determine if it runs into a timeout. Only activate the quirk if it does. This ensure that we do not break Dell systems that do not need the quirk. Signed-off-by: "Christian A. Ehrhardt" Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240121204123.275441-4-lk@c--e.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_acpi.c | 71 ++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index fa222080887d5..928eacbeb21ac 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -25,6 +25,8 @@ struct ucsi_acpi { unsigned long flags; guid_t guid; u64 cmd; + bool dell_quirk_probed; + bool dell_quirk_active; }; static int ucsi_acpi_dsm(struct ucsi_acpi *ua, int func) @@ -126,12 +128,73 @@ static const struct ucsi_operations ucsi_zenbook_ops = { .async_write = ucsi_acpi_async_write }; -static const struct dmi_system_id zenbook_dmi_id[] = { +/* + * Some Dell laptops expect that an ACK command with the + * UCSI_ACK_CONNECTOR_CHANGE bit set is followed by a (separate) + * ACK command that only has the UCSI_ACK_COMMAND_COMPLETE bit set. + * If this is not done events are not delivered to OSPM and + * subsequent commands will timeout. + */ +static int +ucsi_dell_sync_write(struct ucsi *ucsi, unsigned int offset, + const void *val, size_t val_len) +{ + struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi); + u64 cmd = *(u64 *)val, ack = 0; + int ret; + + if (UCSI_COMMAND(cmd) == UCSI_ACK_CC_CI && + cmd & UCSI_ACK_CONNECTOR_CHANGE) + ack = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE; + + ret = ucsi_acpi_sync_write(ucsi, offset, val, val_len); + if (ret != 0) + return ret; + if (ack == 0) + return ret; + + if (!ua->dell_quirk_probed) { + ua->dell_quirk_probed = true; + + cmd = UCSI_GET_CAPABILITY; + ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd, + sizeof(cmd)); + if (ret == 0) + return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, + &ack, sizeof(ack)); + if (ret != -ETIMEDOUT) + return ret; + + ua->dell_quirk_active = true; + dev_err(ua->dev, "Firmware bug: Additional ACK required after ACKing a connector change.\n"); + dev_err(ua->dev, "Firmware bug: Enabling workaround\n"); + } + + if (!ua->dell_quirk_active) + return ret; + + return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &ack, sizeof(ack)); +} + +static const struct ucsi_operations ucsi_dell_ops = { + .read = ucsi_acpi_read, + .sync_write = ucsi_dell_sync_write, + .async_write = ucsi_acpi_async_write +}; + +static const struct dmi_system_id ucsi_acpi_quirks[] = { { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"), }, + .driver_data = (void *)&ucsi_zenbook_ops, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + }, + .driver_data = (void *)&ucsi_dell_ops, }, { } }; @@ -160,6 +223,7 @@ static int ucsi_acpi_probe(struct platform_device *pdev) { struct acpi_device *adev = ACPI_COMPANION(&pdev->dev); const struct ucsi_operations *ops = &ucsi_acpi_ops; + const struct dmi_system_id *id; struct ucsi_acpi *ua; struct resource *res; acpi_status status; @@ -189,8 +253,9 @@ static int ucsi_acpi_probe(struct platform_device *pdev) init_completion(&ua->complete); ua->dev = &pdev->dev; - if (dmi_check_system(zenbook_dmi_id)) - ops = &ucsi_zenbook_ops; + id = dmi_first_match(ucsi_acpi_quirks); + if (id) + ops = id->driver_data; ua->ucsi = ucsi_create(&pdev->dev, ops); if (IS_ERR(ua->ucsi)) -- GitLab From f2e5d3de7e1fbf24483e7f996e519b3ebc3935a1 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 13 Jan 2024 22:55:48 +0200 Subject: [PATCH 0260/1405] usb: typec: tcpm: fix the PD disabled case If the PD is disabled for the port, port->pds will be left as NULL, which causes the following crash during caps intilisation. Fix the crash. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Call trace: tcpm_register_port+0xaec/0xc44 qcom_pmic_typec_probe+0x1a4/0x254 platform_probe+0x68/0xc0 really_probe+0x148/0x2ac __driver_probe_device+0x78/0x12c driver_probe_device+0xd8/0x160 Bluetooth: hci0: QCA Product ID :0x0000000a __device_attach_driver+0xb8/0x138 bus_for_each_drv+0x80/0xdc Bluetooth: hci0: QCA SOC Version :0x40020150 __device_attach+0x9c/0x188 device_initial_probe+0x14/0x20 bus_probe_device+0xac/0xb0 deferred_probe_work_func+0x8c/0xc8 process_one_work+0x1ec/0x51c worker_thread+0x1ec/0x3e4 kthread+0x120/0x124 ret_from_fork+0x10/0x20 Fixes: cd099cde4ed2 ("usb: typec: tcpm: Support multiple capabilities") Signed-off-by: Dmitry Baryshkov Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240113-pmi632-typec-v2-5-182d9aa0a5b3@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 9d410718eaf47..f7d7daa60c8dc 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -6847,7 +6847,8 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) if (err) goto out_role_sw_put; - port->typec_caps.pd = port->pds[0]; + if (port->pds) + port->typec_caps.pd = port->pds[0]; port->typec_port = typec_register_port(port->dev, &port->typec_caps); if (IS_ERR(port->typec_port)) { -- GitLab From e15c99be0c915bbe70dfe55450d268d7bd5bdac8 Mon Sep 17 00:00:00 2001 From: Christoph Niedermaier Date: Fri, 19 Jan 2024 12:35:16 +0100 Subject: [PATCH 0261/1405] tty: serial: Fix bit order in RS485 flag definitions Since the commit 93f3350c46fa ("RS485: fix inconsistencies in the meaning of some variables"), the definition for bit 3 has been removed. But with the switch to bit shift macros in commit 76ac8e29855b ("tty: serial: Cleanup the bit shift with macro"), this gap wasn't preserved. To avoid a break in user/kernel api of the system skip bit 3 again and add a placeholder comment. Signed-off-by: Christoph Niedermaier Fixes: 76ac8e29855b ("tty: serial: Cleanup the bit shift with macro") Fixes: 6056f20f27e9 ("tty: serial: Add RS422 flag to struct serial_rs485") Reviewed-by: Jiri Slaby Cc: Greg Kroah-Hartman Cc: Crescent CY Hsieh Cc: Jiri Slaby Cc: Lukas Wunner Cc: Lino Sanfilippo Cc: Hugo Villeneuve Link: https://lore.kernel.org/r/20240119113516.2944-1-cniedermaier@dh-electronics.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/serial.h b/include/uapi/linux/serial.h index 9086367db0435..de9b4733607e6 100644 --- a/include/uapi/linux/serial.h +++ b/include/uapi/linux/serial.h @@ -145,12 +145,13 @@ struct serial_rs485 { #define SER_RS485_ENABLED _BITUL(0) #define SER_RS485_RTS_ON_SEND _BITUL(1) #define SER_RS485_RTS_AFTER_SEND _BITUL(2) -#define SER_RS485_RX_DURING_TX _BITUL(3) -#define SER_RS485_TERMINATE_BUS _BITUL(4) -#define SER_RS485_ADDRB _BITUL(5) -#define SER_RS485_ADDR_RECV _BITUL(6) -#define SER_RS485_ADDR_DEST _BITUL(7) -#define SER_RS485_MODE_RS422 _BITUL(8) +/* Placeholder for bit 3: SER_RS485_RTS_BEFORE_SEND, which isn't used anymore */ +#define SER_RS485_RX_DURING_TX _BITUL(4) +#define SER_RS485_TERMINATE_BUS _BITUL(5) +#define SER_RS485_ADDRB _BITUL(6) +#define SER_RS485_ADDR_RECV _BITUL(7) +#define SER_RS485_ADDR_DEST _BITUL(8) +#define SER_RS485_MODE_RS422 _BITUL(9) __u32 delay_rts_before_send; __u32 delay_rts_after_send; -- GitLab From 86ee55e9bc7ff7308eda67e5bcb3f8c834c7c113 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Jan 2024 22:25:21 +0300 Subject: [PATCH 0262/1405] serial: 8250_pci1xxxx: fix off by one in pci1xxxx_process_read_data() These > comparisons should be >= to prevent writing one element beyond the end of the rx_buff[] array. The rx_buff[] buffer has RX_BUF_SIZE elements. Fix the buffer overflow. Fixes: aba8290f368d ("8250: microchip: pci1xxxx: Add Burst mode reception support in uart driver for writing into FIFO") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/ZZ7vIfj7Jgh-pJn8@moroto Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci1xxxx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pci1xxxx.c b/drivers/tty/serial/8250/8250_pci1xxxx.c index 558c4c7f3104e..cd258922bd780 100644 --- a/drivers/tty/serial/8250/8250_pci1xxxx.c +++ b/drivers/tty/serial/8250/8250_pci1xxxx.c @@ -302,7 +302,7 @@ static void pci1xxxx_process_read_data(struct uart_port *port, * to read, the data is received one byte at a time. */ while (valid_burst_count--) { - if (*buff_index > (RX_BUF_SIZE - UART_BURST_SIZE)) + if (*buff_index >= (RX_BUF_SIZE - UART_BURST_SIZE)) break; burst_buf = (u32 *)&rx_buff[*buff_index]; *burst_buf = readl(port->membase + UART_RX_BURST_FIFO); @@ -311,7 +311,7 @@ static void pci1xxxx_process_read_data(struct uart_port *port, } while (*valid_byte_count) { - if (*buff_index > RX_BUF_SIZE) + if (*buff_index >= RX_BUF_SIZE) break; rx_buff[*buff_index] = readb(port->membase + UART_RX_BYTE_FIFO); -- GitLab From 30926783a46841c2d1bbf3f74067ba85d304fd0d Mon Sep 17 00:00:00 2001 From: Gui-Dong Han <2045gemini@gmail.com> Date: Fri, 12 Jan 2024 19:36:24 +0800 Subject: [PATCH 0263/1405] serial: core: Fix atomicity violation in uart_tiocmget In uart_tiocmget(): result = uport->mctrl; uart_port_lock_irq(uport); result |= uport->ops->get_mctrl(uport); uart_port_unlock_irq(uport); ... return result; In uart_update_mctrl(): uart_port_lock_irqsave(port, &flags); ... port->mctrl = (old & ~clear) | set; ... port->ops->set_mctrl(port, port->mctrl); ... uart_port_unlock_irqrestore(port, flags); An atomicity violation is identified due to the concurrent execution of uart_tiocmget() and uart_update_mctrl(). After assigning result = uport->mctrl, the mctrl value may change in uart_update_mctrl(), leading to a mismatch between the value returned by uport->ops->get_mctrl(uport) and the mctrl value previously read. This can result in uart_tiocmget() returning an incorrect value. This possible bug is found by an experimental static analysis tool developed by our team, BassCheck[1]. This tool analyzes the locking APIs to extract function pairs that can be concurrently executed, and then analyzes the instructions in the paired functions to identify possible concurrency bugs including data races and atomicity violations. The above possible bug is reported when our tool analyzes the source code of Linux 5.17. To address this issue, it is suggested to move the line result = uport->mctrl inside the uart_port_lock block to ensure atomicity and prevent the mctrl value from being altered during the execution of uart_tiocmget(). With this patch applied, our tool no longer reports the bug, with the kernel configuration allyesconfig for x86_64. Due to the absence of the requisite hardware, we are unable to conduct runtime testing of the patch. Therefore, our verification is solely based on code logic analysis. [1] https://sites.google.com/view/basscheck/ Fixes: c5f4644e6c8b ("[PATCH] Serial: Adjust serial locking") Cc: stable@vger.kernel.org Signed-off-by: Gui-Dong Han <2045gemini@gmail.com> Link: https://lore.kernel.org/r/20240112113624.17048-1-2045gemini@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index b56ed8c376b22..d6a58a9e072a1 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1084,8 +1084,8 @@ static int uart_tiocmget(struct tty_struct *tty) goto out; if (!tty_io_error(tty)) { - result = uport->mctrl; uart_port_lock_irq(uport); + result = uport->mctrl; result |= uport->ops->get_mctrl(uport); uart_port_unlock_irq(uport); } -- GitLab From 0419373333c2f2024966d36261fd82a453281e80 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 16 Jan 2024 16:29:58 -0500 Subject: [PATCH 0264/1405] serial: max310x: set default value when reading clock ready bit If regmap_read() returns a non-zero value, the 'val' variable can be left uninitialized. Clear it before calling regmap_read() to make sure we properly detect the clock ready bit. Fixes: 4cf9a888fd3c ("serial: max310x: Check the clock readiness") Cc: stable@vger.kernel.org Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240116213001.3691629-2-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index f3a99daebdaa0..b2c753ba9cbfd 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -641,7 +641,7 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, /* Wait for crystal */ if (xtal) { - unsigned int val; + unsigned int val = 0; msleep(10); regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val); if (!(val & MAX310X_STS_CLKREADY_BIT)) { -- GitLab From 93cd256ab224c2519e7c4e5f58bb4f1ac2bf0965 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 16 Jan 2024 16:29:59 -0500 Subject: [PATCH 0265/1405] serial: max310x: improve crystal stable clock detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some people are seeing a warning similar to this when using a crystal: max310x 11-006c: clock is not stable yet The datasheet doesn't mention the maximum time to wait for the clock to be stable when using a crystal, and it seems that the 10ms delay in the driver is not always sufficient. Jan Kundrát reported that it took three tries (each separated by 10ms) to get a stable clock. Modify behavior to check stable clock ready bit multiple times (20), and waiting 10ms between each try. Note: the first draft of the driver originally used a 50ms delay, without checking the clock stable bit. Then a loop with 1000 retries was implemented, each time reading the clock stable bit. Fixes: 4cf9a888fd3c ("serial: max310x: Check the clock readiness") Cc: stable@vger.kernel.org Suggested-by: Jan Kundrát Link: https://www.spinics.net/lists/linux-serial/msg35773.html Link: https://lore.kernel.org/all/20240110174015.6f20195fde08e5c9e64e5675@hugovil.com/raw Link: https://github.com/boundarydevices/linux/commit/e5dfe3e4a751392515d78051973190301a37ca9a Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240116213001.3691629-3-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index b2c753ba9cbfd..c0eb0615d945b 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -237,6 +237,10 @@ #define MAX310x_REV_MASK (0xf8) #define MAX310X_WRITE_BIT 0x80 +/* Crystal-related definitions */ +#define MAX310X_XTAL_WAIT_RETRIES 20 /* Number of retries */ +#define MAX310X_XTAL_WAIT_DELAY_MS 10 /* Delay between retries */ + /* MAX3107 specific */ #define MAX3107_REV_ID (0xa0) @@ -641,12 +645,19 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, /* Wait for crystal */ if (xtal) { - unsigned int val = 0; - msleep(10); - regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val); - if (!(val & MAX310X_STS_CLKREADY_BIT)) { + bool stable = false; + unsigned int try = 0, val = 0; + + do { + msleep(MAX310X_XTAL_WAIT_DELAY_MS); + regmap_read(s->regmap, MAX310X_STS_IRQSTS_REG, &val); + + if (val & MAX310X_STS_CLKREADY_BIT) + stable = true; + } while (!stable && (++try < MAX310X_XTAL_WAIT_RETRIES)); + + if (!stable) dev_warn(dev, "clock is not stable yet\n"); - } } return bestfreq; -- GitLab From 8afa6c6decea37e7cb473d2c60473f37f46cea35 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 16 Jan 2024 16:30:00 -0500 Subject: [PATCH 0266/1405] serial: max310x: fail probe if clock crystal is unstable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A stable clock is really required in order to use this UART, so log an error message and bail out if the chip reports that the clock is not stable. Fixes: 4cf9a888fd3c ("serial: max310x: Check the clock readiness") Cc: stable@vger.kernel.org Suggested-by: Jan Kundrát Link: https://www.spinics.net/lists/linux-serial/msg35773.html Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240116213001.3691629-4-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index c0eb0615d945b..552e153a24e05 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -587,7 +587,7 @@ static int max310x_update_best_err(unsigned long f, long *besterr) return 1; } -static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, +static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, unsigned long freq, bool xtal) { unsigned int div, clksrc, pllcfg = 0; @@ -657,7 +657,8 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, } while (!stable && (++try < MAX310X_XTAL_WAIT_RETRIES)); if (!stable) - dev_warn(dev, "clock is not stable yet\n"); + return dev_err_probe(dev, -EAGAIN, + "clock is not stable\n"); } return bestfreq; @@ -1282,7 +1283,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty { int i, ret, fmin, fmax, freq; struct max310x_port *s; - u32 uartclk = 0; + s32 uartclk = 0; bool xtal; for (i = 0; i < devtype->nr; i++) @@ -1360,6 +1361,11 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty } uartclk = max310x_set_ref_clk(dev, s, freq, xtal); + if (uartclk < 0) { + ret = uartclk; + goto out_uart; + } + dev_dbg(dev, "Reference clock set to %i Hz\n", uartclk); for (i = 0; i < devtype->nr; i++) { -- GitLab From b35f8dbbce818b02c730dc85133dc7754266e084 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 16 Jan 2024 16:30:01 -0500 Subject: [PATCH 0267/1405] serial: max310x: prevent infinite while() loop in port startup If there is a problem after resetting a port, the do/while() loop that checks the default value of DIVLSB register may run forever and spam the I2C bus. Add a delay before each read of DIVLSB, and a maximum number of tries to prevent that situation from happening. Also fail probe if port reset is unsuccessful. Fixes: 10d8b34a4217 ("serial: max310x: Driver rework") Cc: stable@vger.kernel.org Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240116213001.3691629-5-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 552e153a24e05..10bf6d75bf9ee 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -237,6 +237,10 @@ #define MAX310x_REV_MASK (0xf8) #define MAX310X_WRITE_BIT 0x80 +/* Port startup definitions */ +#define MAX310X_PORT_STARTUP_WAIT_RETRIES 20 /* Number of retries */ +#define MAX310X_PORT_STARTUP_WAIT_DELAY_MS 10 /* Delay between retries */ + /* Crystal-related definitions */ #define MAX310X_XTAL_WAIT_RETRIES 20 /* Number of retries */ #define MAX310X_XTAL_WAIT_DELAY_MS 10 /* Delay between retries */ @@ -1346,6 +1350,9 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty goto out_clk; for (i = 0; i < devtype->nr; i++) { + bool started = false; + unsigned int try = 0, val = 0; + /* Reset port */ regmap_write(regmaps[i], MAX310X_MODE2_REG, MAX310X_MODE2_RST_BIT); @@ -1354,8 +1361,17 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty /* Wait for port startup */ do { - regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &ret); - } while (ret != 0x01); + msleep(MAX310X_PORT_STARTUP_WAIT_DELAY_MS); + regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &val); + + if (val == 0x01) + started = true; + } while (!started && (++try < MAX310X_PORT_STARTUP_WAIT_RETRIES)); + + if (!started) { + ret = dev_err_probe(dev, -EAGAIN, "port reset failed\n"); + goto out_uart; + } regmap_write(regmaps[i], MAX310X_MODE1_REG, devtype->mode1); } -- GitLab From 2751153b9945c31eb905deb9fbe2d7f127b4b34c Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 1 Dec 2023 21:20:50 +0100 Subject: [PATCH 0268/1405] parisc: Make RO_DATA page aligned in vmlinux.lds.S The rodata_test program for CONFIG_DEBUG_RODATA_TEST=y complains if read-only data does not start at page boundary. Signed-off-by: Helge Deller --- arch/parisc/kernel/vmlinux.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 548051b0b4aff..b445e47903cfd 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -127,7 +127,7 @@ SECTIONS } #endif - RO_DATA(8) + RO_DATA(PAGE_SIZE) /* unwind info */ . = ALIGN(4); -- GitLab From b9402e3b97289ca9e0f0f79f4df64bd6c9176a86 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 17 Jan 2024 17:46:43 +0100 Subject: [PATCH 0269/1405] parisc: Check for valid stride size for cache flushes Report if the calculated cache stride size is zero, otherwise the cache flushing routine will never finish and hang the machine. This can be reproduced with a testcase in qemu, where the firmware reports wrong cache values. Signed-off-by: Helge Deller --- arch/parisc/kernel/cache.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 268d90a9325b4..0c015487e5db6 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -264,6 +264,10 @@ parisc_cache_init(void) icache_stride = CAFL_STRIDE(cache_info.ic_conf); #undef CAFL_STRIDE + /* stride needs to be non-zero, otherwise cache flushes will not work */ + WARN_ON(cache_info.dc_size && dcache_stride == 0); + WARN_ON(cache_info.ic_size && icache_stride == 0); + if ((boot_cpu_data.pdc.capabilities & PDC_MODEL_NVA_MASK) == PDC_MODEL_NVA_UNSUPPORTED) { printk(KERN_WARNING "parisc_cache_init: Only equivalent aliasing supported!\n"); -- GitLab From c8708d758e715c3824a73bf0cda97292b52be44d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 19 Jan 2024 21:16:39 +0100 Subject: [PATCH 0270/1405] parisc: Prevent hung tasks when printing inventory on serial console Printing the inventory on a serial console can be quite slow and thus may trigger the hung task detector (CONFIG_DETECT_HUNG_TASK=y) and possibly reboot the machine. Adding a cond_resched() prevents this. Signed-off-by: Helge Deller Cc: # v6.0+ --- arch/parisc/kernel/drivers.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 25f9b9e9d6dfb..404ea37705ce6 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -1004,6 +1004,9 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data) pr_info("\n"); + /* Prevent hung task messages when printing on serial console */ + cond_resched(); + pr_info("#define HPA_%08lx_DESCRIPTION \"%s\"\n", hpa, parisc_hardware_description(&dev->id)); -- GitLab From 20e08a720cc526dacc0678067ffc81613aa738ca Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 22 Jan 2024 17:51:15 +0100 Subject: [PATCH 0271/1405] parisc: Drop unneeded semicolon in parse_tree_node() Reported-by: kernel test robot Signed-off-by: Helge Deller Closes: https://lore.kernel.org/oe-kbuild-all/202401222059.Wli6OGT0-lkp@intel.com/ --- arch/parisc/kernel/drivers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 404ea37705ce6..c7ff339732ba5 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -742,7 +742,7 @@ parse_tree_node(struct device *parent, int index, struct hardware_path *modpath) }; if (device_for_each_child(parent, &recurse_data, descend_children)) - { /* nothing */ }; + { /* nothing */ } return d.dev; } -- GitLab From 6f6c72acddf4357fcc83593c20ef9064fb42db92 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Sat, 27 Jan 2024 21:02:08 +0100 Subject: [PATCH 0272/1405] iio: move LIGHT_UVA and LIGHT_UVB to the end of iio_modifier The new modifiers should have added to the end of the enum, so they do not affect the existing entries. No modifiers were added since then, so they can be moved safely to the end of the list. Move IIO_MOD_LIGHT_UVA and IIO_MOD_LIGHT_UVB to the end of iio_modifier. Fixes: b89710bd215e ("iio: add modifiers for A and B ultraviolet light") Suggested-by: Paul Cercueil Signed-off-by: Javier Carrasco Link: https://lore.kernel.org/r/20240127200208.185815-1-javier.carrasco.cruz@gmail.com Signed-off-by: Jonathan Cameron --- include/uapi/linux/iio/types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index 5060963707b1e..f2e0b2d50e6b5 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -91,8 +91,6 @@ enum iio_modifier { IIO_MOD_CO2, IIO_MOD_VOC, IIO_MOD_LIGHT_UV, - IIO_MOD_LIGHT_UVA, - IIO_MOD_LIGHT_UVB, IIO_MOD_LIGHT_DUV, IIO_MOD_PM1, IIO_MOD_PM2P5, @@ -107,6 +105,8 @@ enum iio_modifier { IIO_MOD_PITCH, IIO_MOD_YAW, IIO_MOD_ROLL, + IIO_MOD_LIGHT_UVA, + IIO_MOD_LIGHT_UVB, }; enum iio_event_type { -- GitLab From 29142dc92c37d3259a33aef15b03e6ee25b0d188 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 27 Jan 2024 13:21:14 -0800 Subject: [PATCH 0273/1405] tracefs: remove stale 'update_gid' code The 'eventfs_update_gid()' function is no longer called, so remove it (and the helper function it uses). Link: https://lore.kernel.org/all/CAHk-=wj+DsZZ=2iTUkJ-Nojs9fjYMvPs1NuoM3yK7aTDtJfPYQ@mail.gmail.com/ Fixes: 8186fff7ab64 ("tracefs/eventfs: Use root and instance inodes as default ownership") Signed-off-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 38 -------------------------------------- fs/tracefs/internal.h | 1 - 2 files changed, 39 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 6b211522a13ec..1c3dd0ad4660e 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -281,44 +281,6 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode, inode->i_gid = attr->gid; } -static void update_gid(struct eventfs_inode *ei, kgid_t gid, int level) -{ - struct eventfs_inode *ei_child; - - /* at most we have events/system/event */ - if (WARN_ON_ONCE(level > 3)) - return; - - ei->attr.gid = gid; - - if (ei->entry_attrs) { - for (int i = 0; i < ei->nr_entries; i++) { - ei->entry_attrs[i].gid = gid; - } - } - - /* - * Only eventfs_inode with dentries are updated, make sure - * all eventfs_inodes are updated. If one of the children - * do not have a dentry, this function must traverse it. - */ - list_for_each_entry_srcu(ei_child, &ei->children, list, - srcu_read_lock_held(&eventfs_srcu)) { - if (!ei_child->dentry) - update_gid(ei_child, gid, level + 1); - } -} - -void eventfs_update_gid(struct dentry *dentry, kgid_t gid) -{ - struct eventfs_inode *ei = dentry->d_fsdata; - int idx; - - idx = srcu_read_lock(&eventfs_srcu); - update_gid(ei, gid, 0); - srcu_read_unlock(&eventfs_srcu, idx); -} - /** * create_file - create a file in the tracefs filesystem * @name: the name of the file to create. diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 45397df9bb65b..91c2bf0b91d9c 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -82,7 +82,6 @@ struct inode *tracefs_get_inode(struct super_block *sb); struct dentry *eventfs_start_creating(const char *name, struct dentry *parent); struct dentry *eventfs_failed_creating(struct dentry *dentry); struct dentry *eventfs_end_creating(struct dentry *dentry); -void eventfs_update_gid(struct dentry *dentry, kgid_t gid); void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry); #endif /* _TRACEFS_INTERNAL_H */ -- GitLab From eba38cc7578bef94865341c73608bdf49193a51d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 28 Jan 2024 09:53:55 +0100 Subject: [PATCH 0274/1405] bcachefs: Fix build on parisc by avoiding __multi3() The gcc compiler on paric does support the __int128 type, although the architecture does not have native 128-bit support. The effect is, that the bcachefs u128_square() function will pull in the libgcc __multi3() helper, which breaks the kernel build when bcachefs is built as module since this function isn't currently exported in arch/parisc/kernel/parisc_ksyms.c. The build failure can be seen in the latest debian kernel build at: https://buildd.debian.org/status/fetch.php?pkg=linux&arch=hppa&ver=6.7.1-1%7Eexp1&stamp=1706132569&raw=0 We prefer to not export that symbol, so fall back to the optional 64-bit implementation provided by bcachefs and thus avoid usage of __multi3(). Signed-off-by: Helge Deller Cc: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/mean_and_variance.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/mean_and_variance.h b/fs/bcachefs/mean_and_variance.h index b2be565bb8f21..64df11ab422bf 100644 --- a/fs/bcachefs/mean_and_variance.h +++ b/fs/bcachefs/mean_and_variance.h @@ -17,7 +17,7 @@ * Rust and rustc has issues with u128. */ -#if defined(__SIZEOF_INT128__) && defined(__KERNEL__) +#if defined(__SIZEOF_INT128__) && defined(__KERNEL__) && !defined(CONFIG_PARISC) typedef struct { unsigned __int128 v; -- GitLab From 6bb3f7f4c3f4da8e09de188f2f63e8f741bba3bd Mon Sep 17 00:00:00 2001 From: Guoyu Ou Date: Sun, 28 Jan 2024 16:46:17 +0800 Subject: [PATCH 0275/1405] bcachefs: unlock parent dir if entry is not found in subvolume deletion Parent dir is locked by user_path_locked_at() before validating the required dentry. It should be unlocked if we can not perform the deletion. This fixes the problem: $ bcachefs subvolume delete not-exist-entry BCH_IOCTL_SUBVOLUME_DESTROY ioctl error: No such file or directory $ bcachefs subvolume delete not-exist-entry the second will stuck because the parent dir is locked in the previous deletion. Signed-off-by: Guoyu Ou Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 3a4c24c28e7fa..3dc8630ff9fe1 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -455,6 +455,7 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp, if (IS_ERR(victim)) return PTR_ERR(victim); + dir = d_inode(path.dentry); if (victim->d_sb->s_fs_info != c) { ret = -EXDEV; goto err; @@ -463,14 +464,13 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp, ret = -ENOENT; goto err; } - dir = d_inode(path.dentry); ret = __bch2_unlink(dir, victim, true); if (!ret) { fsnotify_rmdir(dir, victim); d_delete(victim); } - inode_unlock(dir); err: + inode_unlock(dir); dput(victim); path_put(&path); return ret; -- GitLab From c79f52f0656eeb3e4a12f7f358f760077ae111b6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 27 Jan 2024 13:44:58 -0700 Subject: [PATCH 0276/1405] io_uring/rw: ensure poll based multishot read retries appropriately io_read_mshot() always relies on poll triggering retries, and this works fine as long as we do a retry per size of the buffer being read. The buffer size is given by the size of the buffer(s) in the given buffer group ID. But if we're reading less than what is available, then we don't always get to read everything that is available. For example, if the buffers available are 32 bytes and we have 64 bytes to read, then we'll correctly read the first 32 bytes and then wait for another poll trigger before we attempt the next read. This next poll trigger may never happen, in which case we just sit forever and never make progress, or it may trigger at some point in the future, and now we're just delivering the available data much later than we should have. io_read_mshot() could do retries itself, but that is wasteful as we'll be going through all of __io_read() again, and most likely in vain. Rather than do that, bump our poll reference count and have io_poll_check_events() do one more loop and check with vfs_poll() if we have more data to read. If we do, io_read_mshot() will get invoked again directly and we'll read the next chunk. io_poll_multishot_retry() must only get called from inside io_poll_issue(), which is our multishot retry handler, as we know we already "own" the request at this point. Cc: stable@vger.kernel.org Link: https://github.com/axboe/liburing/issues/1041 Fixes: fc68fcda0491 ("io_uring/rw: add support for IORING_OP_READ_MULTISHOT") Signed-off-by: Jens Axboe --- io_uring/poll.h | 9 +++++++++ io_uring/rw.c | 10 +++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/io_uring/poll.h b/io_uring/poll.h index ff4d5d753387e..1dacae9e816c9 100644 --- a/io_uring/poll.h +++ b/io_uring/poll.h @@ -24,6 +24,15 @@ struct async_poll { struct io_poll *double_poll; }; +/* + * Must only be called inside issue_flags & IO_URING_F_MULTISHOT, or + * potentially other cases where we already "own" this poll request. + */ +static inline void io_poll_multishot_retry(struct io_kiocb *req) +{ + atomic_inc(&req->poll_refs); +} + int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_poll_add(struct io_kiocb *req, unsigned int issue_flags); diff --git a/io_uring/rw.c b/io_uring/rw.c index 118cc9f1cf160..d5e79d9bdc717 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -18,6 +18,7 @@ #include "opdef.h" #include "kbuf.h" #include "rsrc.h" +#include "poll.h" #include "rw.h" struct io_rw { @@ -962,8 +963,15 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER, ret, cflags | IORING_CQE_F_MORE)) { - if (issue_flags & IO_URING_F_MULTISHOT) + if (issue_flags & IO_URING_F_MULTISHOT) { + /* + * Force retry, as we might have more data to + * be read and otherwise it won't get retried + * until (if ever) another poll is triggered. + */ + io_poll_multishot_retry(req); return IOU_ISSUE_SKIP_COMPLETE; + } return -EAGAIN; } } -- GitLab From d6d33f03baa43d763fe094ca926eeae7d3421d07 Mon Sep 17 00:00:00 2001 From: Ism Hong Date: Tue, 26 Dec 2023 16:51:41 +0800 Subject: [PATCH 0277/1405] fs/ntfs3: use non-movable memory for ntfs3 MFT buffer cache Since the buffer cache for ntfs3 metadata is not released until the file system is unmounted, allocating from the movable zone may result in cma allocation failures. This is due to the page still being used by ntfs3, leading to migration failures. To address this, this commit use sb_bread_umovable() instead of sb_bread(). This change prevents allocation from the movable zone, ensuring compatibility with scenarios where the buffer head is not released until unmount. This patch is inspired by commit a8ac900b8163("ext4: use non-movable memory for the ext4 superblock"). The issue is found when playing video files stored in NTFS on the Android TV platform. During this process, the media parser reads the video file, causing ntfs3 to allocate buffer cache from the CMA area. Subsequently, the hardware decoder attempts to allocate memory from the same CMA area. However, the page is still in use by ntfs3, resulting in a migrate failure in alloc_contig_range(). The pinned page and allocating stacktrace reported by page owner shows below: page:ffffffff00b68880 refcount:3 mapcount:0 mapping:ffffff80046aa828 index:0xc0040 pfn:0x20fa4 aops:def_blk_aops ino:0 flags: 0x2020(active|private) page dumped because: migration failure page last allocated via order 0, migratetype Movable, gfp_mask 0x108c48 (GFP_NOFS|__GFP_NOFAIL|__GFP_HARDWALL|__GFP_MOVABLE), page_owner tracks the page as allocated prep_new_page get_page_from_freelist __alloc_pages_nodemask pagecache_get_page __getblk_gfp __bread_gfp ntfs_read_run_nb ntfs_read_bh mi_read ntfs_iget5 dir_search_u ntfs_lookup __lookup_slow lookup_slow walk_component path_lookupat Signed-off-by: Ism Hong Signed-off-by: Konstantin Komarov --- fs/ntfs3/ntfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index abbc7182554aa..2b54ae94440fb 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -1035,7 +1035,7 @@ static inline u64 bytes_to_block(const struct super_block *sb, u64 size) static inline struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block) { - struct buffer_head *bh = sb_bread(sb, block); + struct buffer_head *bh = sb_bread_unmovable(sb, block); if (bh) return bh; -- GitLab From 5ca87d01eba7bdfe9536a157ca33c1455bb8d16c Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 26 Jan 2024 11:03:21 +0300 Subject: [PATCH 0278/1405] fs/ntfs3: Prevent generic message "attempt to access beyond end of device" It used in test environment. Signed-off-by: Konstantin Komarov --- fs/ntfs3/fsntfs.c | 24 ++++++++++++++++++++++++ fs/ntfs3/ntfs_fs.h | 14 +------------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 321978019407f..ae2ef5c11868c 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -1007,6 +1007,30 @@ static inline __le32 security_hash(const void *sd, size_t bytes) return cpu_to_le32(hash); } +/* + * simple wrapper for sb_bread_unmovable. + */ +struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block) +{ + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct buffer_head *bh; + + if (unlikely(block >= sbi->volume.blocks)) { + /* prevent generic message "attempt to access beyond end of device" */ + ntfs_err(sb, "try to read out of volume at offset 0x%llx", + (u64)block << sb->s_blocksize_bits); + return NULL; + } + + bh = sb_bread_unmovable(sb, block); + if (bh) + return bh; + + ntfs_err(sb, "failed to read volume at offset 0x%llx", + (u64)block << sb->s_blocksize_bits); + return NULL; +} + int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer) { struct block_device *bdev = sb->s_bdev; diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 2b54ae94440fb..81f7563428ee0 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -586,6 +586,7 @@ bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes); int log_replay(struct ntfs_inode *ni, bool *initialized); /* Globals from fsntfs.c */ +struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block); bool ntfs_fix_pre_write(struct NTFS_RECORD_HEADER *rhdr, size_t bytes); int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes, bool simple); @@ -1032,19 +1033,6 @@ static inline u64 bytes_to_block(const struct super_block *sb, u64 size) return (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; } -static inline struct buffer_head *ntfs_bread(struct super_block *sb, - sector_t block) -{ - struct buffer_head *bh = sb_bread_unmovable(sb, block); - - if (bh) - return bh; - - ntfs_err(sb, "failed to read volume at offset 0x%llx", - (u64)block << sb->s_blocksize_bits); - return NULL; -} - static inline struct ntfs_inode *ntfs_i(struct inode *inode) { return container_of(inode, struct ntfs_inode, vfs_inode); -- GitLab From 4fd6c08a16d7f1ba10212c9ef7bc73218144b463 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 26 Jan 2024 11:12:38 +0300 Subject: [PATCH 0279/1405] fs/ntfs3: Use i_size_read and i_size_write Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrib.c | 4 ++-- fs/ntfs3/dir.c | 2 +- fs/ntfs3/file.c | 11 ++++++----- fs/ntfs3/frecord.c | 10 +++++----- fs/ntfs3/index.c | 8 ++++---- fs/ntfs3/inode.c | 2 +- 6 files changed, 19 insertions(+), 18 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 646e2dad1b757..7aadf50109994 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -2084,7 +2084,7 @@ int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) /* Update inode size. */ ni->i_valid = valid_size; - ni->vfs_inode.i_size = data_size; + i_size_write(&ni->vfs_inode, data_size); inode_set_bytes(&ni->vfs_inode, total_size); ni->ni_flags |= NI_FLAG_UPDATE_PARENT; mark_inode_dirty(&ni->vfs_inode); @@ -2499,7 +2499,7 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) mi_b->dirty = true; done: - ni->vfs_inode.i_size += bytes; + i_size_write(&ni->vfs_inode, ni->vfs_inode.i_size + bytes); ni->ni_flags |= NI_FLAG_UPDATE_PARENT; mark_inode_dirty(&ni->vfs_inode); diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index 9f6dd445eb04d..effa6accf8a85 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -517,7 +517,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, u32 e_size, off, end; size_t drs = 0, fles = 0, bit = 0; struct indx_node *node = NULL; - size_t max_indx = ni->vfs_inode.i_size >> ni->dir.index_bits; + size_t max_indx = i_size_read(&ni->vfs_inode) >> ni->dir.index_bits; if (is_empty) *is_empty = true; diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 07ed3d946e7c5..b702543a87953 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -646,7 +646,7 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) if (err) goto out; } else if (new_size > i_size) { - inode->i_size = new_size; + i_size_write(inode, new_size); } } @@ -696,7 +696,7 @@ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry, goto out; } inode_dio_wait(inode); - oldsize = inode->i_size; + oldsize = i_size_read(inode); newsize = attr->ia_size; if (newsize <= oldsize) @@ -708,7 +708,7 @@ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry, goto out; ni->ni_flags |= NI_FLAG_UPDATE_PARENT; - inode->i_size = newsize; + i_size_write(inode, newsize); } setattr_copy(idmap, inode, attr); @@ -847,7 +847,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) size_t count = iov_iter_count(from); loff_t pos = iocb->ki_pos; struct inode *inode = file_inode(file); - loff_t i_size = inode->i_size; + loff_t i_size = i_size_read(inode); struct address_space *mapping = inode->i_mapping; struct ntfs_inode *ni = ntfs_i(inode); u64 valid = ni->i_valid; @@ -1177,7 +1177,8 @@ static int ntfs_file_release(struct inode *inode, struct file *file) down_write(&ni->file.run_lock); err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, - inode->i_size, &ni->i_valid, false, NULL); + i_size_read(inode), &ni->i_valid, false, + NULL); up_write(&ni->file.run_lock); ni_unlock(ni); diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 2636ab7640ace..3b42938a9d3b2 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -2099,7 +2099,7 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page) gfp_t gfp_mask; struct page *pg; - if (vbo >= ni->vfs_inode.i_size) { + if (vbo >= i_size_read(&ni->vfs_inode)) { SetPageUptodate(page); err = 0; goto out; @@ -2173,7 +2173,7 @@ int ni_decompress_file(struct ntfs_inode *ni) { struct ntfs_sb_info *sbi = ni->mi.sbi; struct inode *inode = &ni->vfs_inode; - loff_t i_size = inode->i_size; + loff_t i_size = i_size_read(inode); struct address_space *mapping = inode->i_mapping; gfp_t gfp_mask = mapping_gfp_mask(mapping); struct page **pages = NULL; @@ -2457,6 +2457,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, struct ATTR_LIST_ENTRY *le = NULL; struct runs_tree *run = &ni->file.run; u64 valid_size = ni->i_valid; + loff_t i_size = i_size_read(&ni->vfs_inode); u64 vbo_disk; size_t unc_size; u32 frame_size, i, npages_disk, ondisk_size; @@ -2548,7 +2549,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, } } - frames = (ni->vfs_inode.i_size - 1) >> frame_bits; + frames = (i_size - 1) >> frame_bits; err = attr_wof_frame_info(ni, attr, run, frame64, frames, frame_bits, &ondisk_size, &vbo_data); @@ -2556,8 +2557,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, goto out2; if (frame64 == frames) { - unc_size = 1 + ((ni->vfs_inode.i_size - 1) & - (frame_size - 1)); + unc_size = 1 + ((i_size - 1) & (frame_size - 1)); ondisk_size = attr_size(attr) - vbo_data; } else { unc_size = frame_size; diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index cf92b2433f7a7..daabaad63aaf6 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -1462,7 +1462,7 @@ static int indx_create_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, goto out2; if (in->name == I30_NAME) { - ni->vfs_inode.i_size = data_size; + i_size_write(&ni->vfs_inode, data_size); inode_set_bytes(&ni->vfs_inode, alloc_size); } @@ -1544,7 +1544,7 @@ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, } if (in->name == I30_NAME) - ni->vfs_inode.i_size = data_size; + i_size_write(&ni->vfs_inode, data_size); *vbn = bit << indx->idx2vbn_bits; @@ -2090,7 +2090,7 @@ static int indx_shrink(struct ntfs_index *indx, struct ntfs_inode *ni, return err; if (in->name == I30_NAME) - ni->vfs_inode.i_size = new_data; + i_size_write(&ni->vfs_inode, new_data); bpb = bitmap_size(bit); if (bpb * 8 == nbits) @@ -2576,7 +2576,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni, err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, &indx->alloc_run, 0, NULL, false, NULL); if (in->name == I30_NAME) - ni->vfs_inode.i_size = 0; + i_size_write(&ni->vfs_inode, 0); err = ni_remove_attr(ni, ATTR_ALLOC, in->name, in->name_len, false, NULL); diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 85452a6b1d40a..eb7a8c9fba018 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -985,7 +985,7 @@ int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, } if (pos + err > inode->i_size) { - inode->i_size = pos + err; + i_size_write(inode, pos + err); dirty = true; } -- GitLab From 1b7dd28e14c4728ae1a815605ca33ffb4ce1b309 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 26 Jan 2024 11:13:59 +0300 Subject: [PATCH 0280/1405] fs/ntfs3: Correct function is_rst_area_valid Reported-by: Robert Morris Signed-off-by: Konstantin Komarov --- fs/ntfs3/fslog.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 7dbb000fc6911..855519713bf79 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -465,7 +465,7 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr) { const struct RESTART_AREA *ra; u16 cl, fl, ul; - u32 off, l_size, file_dat_bits, file_size_round; + u32 off, l_size, seq_bits; u16 ro = le16_to_cpu(rhdr->ra_off); u32 sys_page = le32_to_cpu(rhdr->sys_page_size); @@ -511,13 +511,15 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr) /* Make sure the sequence number bits match the log file size. */ l_size = le64_to_cpu(ra->l_size); - file_dat_bits = sizeof(u64) * 8 - le32_to_cpu(ra->seq_num_bits); - file_size_round = 1u << (file_dat_bits + 3); - if (file_size_round != l_size && - (file_size_round < l_size || (file_size_round / 2) > l_size)) { - return false; + seq_bits = sizeof(u64) * 8 + 3; + while (l_size) { + l_size >>= 1; + seq_bits -= 1; } + if (seq_bits != ra->seq_num_bits) + return false; + /* The log page data offset and record header length must be quad-aligned. */ if (!IS_ALIGNED(le16_to_cpu(ra->data_off), 8) || !IS_ALIGNED(le16_to_cpu(ra->rec_hdr_len), 8)) -- GitLab From 652cfeb43d6b9aba5c7c4902bed7a7340df131fb Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 26 Jan 2024 11:14:31 +0300 Subject: [PATCH 0281/1405] fs/ntfs3: Fixed overflow check in mi_enum_attr() Reported-by: Robert Morris Signed-off-by: Konstantin Komarov --- fs/ntfs3/record.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index 7b6423584eaee..6aa3a9d44df1b 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -279,7 +279,7 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) if (t16 > asize) return NULL; - if (t16 + le32_to_cpu(attr->res.data_size) > asize) + if (le32_to_cpu(attr->res.data_size) > asize - t16) return NULL; t32 = sizeof(short) * attr->name_len; -- GitLab From d68968440b1a75dee05cfac7f368f1aa139e1911 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 29 Jan 2024 10:30:09 +0300 Subject: [PATCH 0282/1405] fs/ntfs3: Update inode->i_size after success write into compressed file Reported-by: Giovanni Santini Signed-off-by: Konstantin Komarov --- fs/ntfs3/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index b702543a87953..691b0c9b95ae7 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -1054,6 +1054,8 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) iocb->ki_pos += written; if (iocb->ki_pos > ni->i_valid) ni->i_valid = iocb->ki_pos; + if (iocb->ki_pos > i_size) + i_size_write(inode, iocb->ki_pos); return written; } -- GitLab From ec4d82f855ce332de26fe080892483de98cc1a19 Mon Sep 17 00:00:00 2001 From: Mohammad Rahimi Date: Sat, 27 Jan 2024 11:26:28 +0800 Subject: [PATCH 0283/1405] thunderbolt: Fix setting the CNS bit in ROUTER_CS_5 The bit 23, CM TBT3 Not Supported (CNS), in ROUTER_CS_5 indicates whether a USB4 Connection Manager is TBT3-Compatible and should be: 0b for TBT3-Compatible 1b for Not TBT3-Compatible Fixes: b04079837b20 ("thunderbolt: Add initial support for USB4") Cc: stable@vger.kernel.org Signed-off-by: Mohammad Rahimi Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tb_regs.h | 2 +- drivers/thunderbolt/usb4.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thunderbolt/tb_regs.h b/drivers/thunderbolt/tb_regs.h index 87e4795275fe6..6f798f6a2b848 100644 --- a/drivers/thunderbolt/tb_regs.h +++ b/drivers/thunderbolt/tb_regs.h @@ -203,7 +203,7 @@ struct tb_regs_switch_header { #define ROUTER_CS_5_WOP BIT(1) #define ROUTER_CS_5_WOU BIT(2) #define ROUTER_CS_5_WOD BIT(3) -#define ROUTER_CS_5_C3S BIT(23) +#define ROUTER_CS_5_CNS BIT(23) #define ROUTER_CS_5_PTO BIT(24) #define ROUTER_CS_5_UTO BIT(25) #define ROUTER_CS_5_HCO BIT(26) diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index f8f0d24ff6e46..1515eff8cc3e2 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -290,7 +290,7 @@ int usb4_switch_setup(struct tb_switch *sw) } /* TBT3 supported by the CM */ - val |= ROUTER_CS_5_C3S; + val &= ~ROUTER_CS_5_CNS; return tb_sw_write(sw, &val, TB_CFG_SWITCH, ROUTER_CS_5, 1); } -- GitLab From efb56d84dd9c3de3c99fc396abb57c6d330038b5 Mon Sep 17 00:00:00 2001 From: David Senoner Date: Fri, 26 Jan 2024 16:56:26 +0100 Subject: [PATCH 0284/1405] ALSA: hda/realtek: Fix the external mic not being recognised for Acer Swift 1 SF114-32 If you connect an external headset/microphone to the 3.5mm jack on the Acer Swift 1 SF114-32 it does not recognize the microphone. This fixes that and gives the user the ability to choose between internal and headset mic. Signed-off-by: David Senoner Cc: Link: https://lore.kernel.org/r/20240126155626.2304465-1-seda18@rolmail.net Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bfefe9ccbc6be..ef51e51fb6a51 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9655,6 +9655,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1269, "Acer SWIFT SF314-54", ALC256_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x126a, "Acer Swift SF114-32", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), -- GitLab From f0d78972f27dc1d1d51fbace2713ad3cdc60a877 Mon Sep 17 00:00:00 2001 From: Luka Guzenko Date: Sun, 28 Jan 2024 16:57:04 +0100 Subject: [PATCH 0285/1405] ALSA: hda/realtek: Enable Mute LED on HP Laptop 14-fq0xxx This HP Laptop uses ALC236 codec with COEF 0x07 controlling the mute LED. Enable existing quirk for this device. Signed-off-by: Luka Guzenko Cc: Link: https://lore.kernel.org/r/20240128155704.2333812-1-l.guzenko@web.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ef51e51fb6a51..bfa244028b994 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9856,6 +9856,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8786, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8787, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x87b7, "HP Laptop 14-fq0xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e7, "HP ProBook 450 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), -- GitLab From c0787fcff88bce36d21e5b726bdeab694baba6a5 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Sun, 28 Jan 2024 16:23:38 +0300 Subject: [PATCH 0286/1405] Revert "ALSA: usb-audio: Skip setting clock selector for single connections" This reverts commit 67794f882adca00d043899ac248bc002751da9f6. We need to explicitly set up the clock selector to workaround a problem with the Behringer mixers. This was originally done in d2e8f641257d ("ALSA: usb-audio: Explicitly set up the clock selector") The problem with MOTU M Series mentioned in commit message was fixed in a different way by checking control capabilities of clock selectors. Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20240128132338.819273-1-alexander@tsoy.me Signed-off-by: Takashi Iwai --- sound/usb/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 7b259641adb51..a8204c6d6fac1 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -344,7 +344,7 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, if (ret > 0) { /* Skip setting clock selector again for some devices */ if (chip->quirk_flags & QUIRK_FLAG_SKIP_CLOCK_SELECTOR || - pins == 1 || !writeable) + !writeable) return ret; err = uac_clock_selector_set_val(chip, entity_id, cur); if (err < 0) -- GitLab From 82ef1a5356572219f41f9123ca047259a77bd67b Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Wed, 24 Jan 2024 11:26:44 +0100 Subject: [PATCH 0287/1405] xfs: reset XFS_ATTR_INCOMPLETE filter on node removal In XFS_DAS_NODE_REMOVE_ATTR case, xfs_attr_mode_remove_attr() sets filter to XFS_ATTR_INCOMPLETE. The filter is then reset in xfs_attr_complete_op() if XFS_DA_OP_REPLACE operation is performed. The filter is not reset though if XFS just removes the attribute (args->value == NULL) with xfs_attr_defer_remove(). attr code goes to XFS_DAS_DONE state. Fix this by always resetting XFS_ATTR_INCOMPLETE filter. The replace operation already resets this filter in anyway and others are completed at this step hence don't need it. Fixes: fdaf1bb3cafc ("xfs: ATTR_REPLACE algorithm with LARP enabled needs rework") Signed-off-by: Andrey Albershteyn Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/libxfs/xfs_attr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 9976a00a73f99..e965a48e7db96 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -421,10 +421,10 @@ xfs_attr_complete_op( bool do_replace = args->op_flags & XFS_DA_OP_REPLACE; args->op_flags &= ~XFS_DA_OP_REPLACE; - if (do_replace) { - args->attr_filter &= ~XFS_ATTR_INCOMPLETE; + args->attr_filter &= ~XFS_ATTR_INCOMPLETE; + if (do_replace) return replace_state; - } + return XFS_DAS_DONE; } -- GitLab From 9c64e749cebd9c2d3d55261530a98bcccb83b950 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 23 Jan 2024 19:14:14 +0100 Subject: [PATCH 0288/1405] drm/virtio: Set segment size for virtio_gpu device Set the segment size of the virtio_gpu device to the value used by the drm helpers when allocating sg lists to fix the following complaint from DMA_API debug code: DMA-API: virtio-pci 0000:07:00.0: mapping sg segment longer than device claims to support [len=262144] [max=65536] Cc: stable@vger.kernel.org Tested-by: Zhenyu Zhang Acked-by: Vivek Kasireddy Signed-off-by: Sebastian Ott Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/7258a4cc-da16-5c34-a042-2a23ee396d56@redhat.com --- drivers/gpu/drm/virtio/virtgpu_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index f8e9abe647b92..9539aa28937fa 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -94,6 +94,7 @@ static int virtio_gpu_probe(struct virtio_device *vdev) goto err_free; } + dma_set_max_seg_size(dev->dev, dma_max_mapping_size(dev->dev) ?: UINT_MAX); ret = virtio_gpu_init(vdev, dev); if (ret) goto err_free; -- GitLab From ebd4acc0cbeae9efea15993b11b05bd32942f3f0 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 23 Jan 2024 14:27:30 +0100 Subject: [PATCH 0289/1405] riscv: Fix wrong size passed to local_flush_tlb_range_asid() local_flush_tlb_range_asid() takes the size as argument, not the end of the range to flush, so fix this by computing the size from the end and the start of the range. Fixes: 7a92fc8b4d20 ("mm: Introduce flush_cache_vmap_early()") Signed-off-by: Alexandre Ghiti Signed-off-by: Dennis Zhou --- arch/riscv/mm/tlbflush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 8d12b26f5ac37..9619965f65018 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -68,7 +68,7 @@ static inline void local_flush_tlb_range_asid(unsigned long start, void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) { - local_flush_tlb_range_asid(start, end, PAGE_SIZE, FLUSH_TLB_NO_ASID); + local_flush_tlb_range_asid(start, end - start, PAGE_SIZE, FLUSH_TLB_NO_ASID); } static void __ipi_flush_tlb_all(void *info) -- GitLab From b2dd7b953c25ffd5912dda17e980e7168bebcf6c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 17 Oct 2023 17:04:39 +0300 Subject: [PATCH 0290/1405] fs/ntfs3: Fix an NULL dereference bug The issue here is when this is called from ntfs_load_attr_list(). The "size" comes from le32_to_cpu(attr->res.data_size) so it can't overflow on a 64bit systems but on 32bit systems the "+ 1023" can overflow and the result is zero. This means that the kmalloc will succeed by returning the ZERO_SIZE_PTR and then the memcpy() will crash with an Oops on the next line. Fixes: be71b5cba2e6 ("fs/ntfs3: Add attrib operations") Signed-off-by: Dan Carpenter Signed-off-by: Konstantin Komarov --- fs/ntfs3/ntfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 81f7563428ee0..627419bd6f778 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -475,7 +475,7 @@ bool al_delete_le(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST vcn, int al_update(struct ntfs_inode *ni, int sync); static inline size_t al_aligned(size_t size) { - return (size + 1023) & ~(size_t)1023; + return size_add(size, 1023) & ~(size_t)1023; } /* Globals from bitfunc.c */ -- GitLab From 731ab1f9828800df871c5a7ab9ffe965317d3f15 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Sat, 30 Dec 2023 17:00:03 +0800 Subject: [PATCH 0291/1405] fs/ntfs3: Fix oob in ntfs_listxattr The length of name cannot exceed the space occupied by ea. Reported-and-tested-by: syzbot+65e940cfb8f99a97aca7@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Signed-off-by: Konstantin Komarov --- fs/ntfs3/xattr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 071356d096d83..53e7d1fa036aa 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -219,6 +219,9 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, if (!ea->name_len) break; + if (ea->name_len > ea_size) + break; + if (buffer) { /* Check if we can use field ea->name */ if (off + ea_size > size) -- GitLab From 1f5fa4b3b85ceb43f1053290f0ade037b50e6297 Mon Sep 17 00:00:00 2001 From: Nekun Date: Mon, 30 Oct 2023 08:33:54 +0000 Subject: [PATCH 0292/1405] fs/ntfs3: Add ioctl operation for directories (FITRIM) While ntfs3 supports discards, FITRIM ioctl() command has defined only for regular files. This may confuse users trying to invoke `fstrim` utility with the directory argument (for example, call `fstrim ` which is the common practice). In this case, ioctl() returns -ENOTTY without any error messages in kernel ring buffer, this may be easily interpreted as no support for discards in ntfs3 driver. Currently only FITRIM command implemented in ntfs_ioctl() and passed inode used only for dereferencing NTFS superblock, so no need for separate ioctl() handler for directories, just add existing ntfs_ioctl() handler to ntfs_dir_operations. Signed-off-by: Nekun Signed-off-by: Konstantin Komarov --- fs/ntfs3/dir.c | 4 ++++ fs/ntfs3/file.c | 4 ++-- fs/ntfs3/ntfs_fs.h | 2 ++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index effa6accf8a85..5cf3d9decf646 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -611,5 +611,9 @@ const struct file_operations ntfs_dir_operations = { .iterate_shared = ntfs_readdir, .fsync = generic_file_fsync, .open = ntfs_file_open, + .unlocked_ioctl = ntfs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ntfs_compat_ioctl, +#endif }; // clang-format on diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 691b0c9b95ae7..5418662c80d88 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -48,7 +48,7 @@ static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg) return 0; } -static long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg) +long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg) { struct inode *inode = file_inode(filp); struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info; @@ -61,7 +61,7 @@ static long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg) } #ifdef CONFIG_COMPAT -static long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg) +long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg) { return ntfs_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 627419bd6f778..79356fd29a141 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -502,6 +502,8 @@ int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry, int ntfs_file_open(struct inode *inode, struct file *file); int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); +long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg); +long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg); extern const struct inode_operations ntfs_special_inode_operations; extern const struct inode_operations ntfs_file_inode_operations; extern const struct file_operations ntfs_file_operations; -- GitLab From 622cd3daa8eae37359a6fd3c07c36d19f66606b5 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 10 Nov 2023 20:59:22 +0100 Subject: [PATCH 0293/1405] fs/ntfs3: Slightly simplify ntfs_inode_printk() The size passed to snprintf() includes the space for the trailing space. So there is no reason here not to use all the available space. So remove the -1 when computing 'name_len'. While at it, use the size of the array directly instead of the intermediate 'name_len' variable. snprintf() also guaranties that the buffer if NULL terminated, so there is no need to write an additional trailing NULL "To be sure". Signed-off-by: Christophe JAILLET Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index c55a29793a8d8..cef5467fd9283 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -122,13 +122,12 @@ void ntfs_inode_printk(struct inode *inode, const char *fmt, ...) if (name) { struct dentry *de = d_find_alias(inode); - const u32 name_len = ARRAY_SIZE(s_name_buf) - 1; if (de) { spin_lock(&de->d_lock); - snprintf(name, name_len, " \"%s\"", de->d_name.name); + snprintf(name, sizeof(s_name_buf), " \"%s\"", + de->d_name.name); spin_unlock(&de->d_lock); - name[name_len] = 0; /* To be sure. */ } else { name[0] = 0; } -- GitLab From 8eed4e00a370b37b4e5985ed983dccedd555ea9d Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 29 Jan 2024 14:38:42 +0800 Subject: [PATCH 0294/1405] x86/lib: Revert to _ASM_EXTABLE_UA() for {get,put}_user() fixups During memory error injection test on kernels >= v6.4, the kernel panics like below. However, this issue couldn't be reproduced on kernels <= v6.3. mce: [Hardware Error]: CPU 296: Machine Check Exception: f Bank 1: bd80000000100134 mce: [Hardware Error]: RIP 10: {__get_user_nocheck_4+0x6/0x20} mce: [Hardware Error]: TSC 411a93533ed ADDR 346a8730040 MISC 86 mce: [Hardware Error]: PROCESSOR 0:a06d0 TIME 1706000767 SOCKET 1 APIC 211 microcode 80001490 mce: [Hardware Error]: Run the above through 'mcelog --ascii' mce: [Hardware Error]: Machine check: Data load in unrecoverable area of kernel Kernel panic - not syncing: Fatal local machine check The MCA code can recover from an in-kernel #MC if the fixup type is EX_TYPE_UACCESS, explicitly indicating that the kernel is attempting to access userspace memory. However, if the fixup type is EX_TYPE_DEFAULT the only thing that is raised for an in-kernel #MC is a panic. ex_handler_uaccess() would warn if users gave a non-canonical addresses (with bit 63 clear) to {get, put}_user(), which was unexpected. Therefore, commit b19b74bc99b1 ("x86/mm: Rework address range check in get_user() and put_user()") replaced _ASM_EXTABLE_UA() with _ASM_EXTABLE() for {get, put}_user() fixups. However, the new fixup type EX_TYPE_DEFAULT results in a panic. Commit 6014bc27561f ("x86-64: make access_ok() independent of LAM") added the check gp_fault_address_ok() right before the WARN_ONCE() in ex_handler_uaccess() to not warn about non-canonical user addresses due to LAM. With that in place, revert back to _ASM_EXTABLE_UA() for {get,put}_user() exception fixups in order to be able to handle in-kernel MCEs correctly again. [ bp: Massage commit message. ] Fixes: b19b74bc99b1 ("x86/mm: Rework address range check in get_user() and put_user()") Signed-off-by: Qiuxu Zhuo Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Kirill A. Shutemov Cc: Link: https://lore.kernel.org/r/20240129063842.61584-1-qiuxu.zhuo@intel.com --- arch/x86/lib/getuser.S | 24 ++++++++++++------------ arch/x86/lib/putuser.S | 20 ++++++++++---------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 20ef350a60fbb..10d5ed8b5990f 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -163,23 +163,23 @@ SYM_CODE_END(__get_user_8_handle_exception) #endif /* get_user */ - _ASM_EXTABLE(1b, __get_user_handle_exception) - _ASM_EXTABLE(2b, __get_user_handle_exception) - _ASM_EXTABLE(3b, __get_user_handle_exception) + _ASM_EXTABLE_UA(1b, __get_user_handle_exception) + _ASM_EXTABLE_UA(2b, __get_user_handle_exception) + _ASM_EXTABLE_UA(3b, __get_user_handle_exception) #ifdef CONFIG_X86_64 - _ASM_EXTABLE(4b, __get_user_handle_exception) + _ASM_EXTABLE_UA(4b, __get_user_handle_exception) #else - _ASM_EXTABLE(4b, __get_user_8_handle_exception) - _ASM_EXTABLE(5b, __get_user_8_handle_exception) + _ASM_EXTABLE_UA(4b, __get_user_8_handle_exception) + _ASM_EXTABLE_UA(5b, __get_user_8_handle_exception) #endif /* __get_user */ - _ASM_EXTABLE(6b, __get_user_handle_exception) - _ASM_EXTABLE(7b, __get_user_handle_exception) - _ASM_EXTABLE(8b, __get_user_handle_exception) + _ASM_EXTABLE_UA(6b, __get_user_handle_exception) + _ASM_EXTABLE_UA(7b, __get_user_handle_exception) + _ASM_EXTABLE_UA(8b, __get_user_handle_exception) #ifdef CONFIG_X86_64 - _ASM_EXTABLE(9b, __get_user_handle_exception) + _ASM_EXTABLE_UA(9b, __get_user_handle_exception) #else - _ASM_EXTABLE(9b, __get_user_8_handle_exception) - _ASM_EXTABLE(10b, __get_user_8_handle_exception) + _ASM_EXTABLE_UA(9b, __get_user_8_handle_exception) + _ASM_EXTABLE_UA(10b, __get_user_8_handle_exception) #endif diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index 2877f59341775..975c9c18263d2 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -133,15 +133,15 @@ SYM_CODE_START_LOCAL(__put_user_handle_exception) RET SYM_CODE_END(__put_user_handle_exception) - _ASM_EXTABLE(1b, __put_user_handle_exception) - _ASM_EXTABLE(2b, __put_user_handle_exception) - _ASM_EXTABLE(3b, __put_user_handle_exception) - _ASM_EXTABLE(4b, __put_user_handle_exception) - _ASM_EXTABLE(5b, __put_user_handle_exception) - _ASM_EXTABLE(6b, __put_user_handle_exception) - _ASM_EXTABLE(7b, __put_user_handle_exception) - _ASM_EXTABLE(9b, __put_user_handle_exception) + _ASM_EXTABLE_UA(1b, __put_user_handle_exception) + _ASM_EXTABLE_UA(2b, __put_user_handle_exception) + _ASM_EXTABLE_UA(3b, __put_user_handle_exception) + _ASM_EXTABLE_UA(4b, __put_user_handle_exception) + _ASM_EXTABLE_UA(5b, __put_user_handle_exception) + _ASM_EXTABLE_UA(6b, __put_user_handle_exception) + _ASM_EXTABLE_UA(7b, __put_user_handle_exception) + _ASM_EXTABLE_UA(9b, __put_user_handle_exception) #ifdef CONFIG_X86_32 - _ASM_EXTABLE(8b, __put_user_handle_exception) - _ASM_EXTABLE(10b, __put_user_handle_exception) + _ASM_EXTABLE_UA(8b, __put_user_handle_exception) + _ASM_EXTABLE_UA(10b, __put_user_handle_exception) #endif -- GitLab From 37e8c97e539015637cb920d3e6f1e404f707a06e Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Wed, 24 Jan 2024 02:21:47 -0800 Subject: [PATCH 0295/1405] net: hsr: remove WARN_ONCE() in send_hsr_supervision_frame() Syzkaller reported [1] hitting a warning after failing to allocate resources for skb in hsr_init_skb(). Since a WARN_ONCE() call will not help much in this case, it might be prudent to switch to netdev_warn_once(). At the very least it will suppress syzkaller reports such as [1]. Just in case, use netdev_warn_once() in send_prp_supervision_frame() for similar reasons. [1] HSR: Could not send supervision frame WARNING: CPU: 1 PID: 85 at net/hsr/hsr_device.c:294 send_hsr_supervision_frame+0x60a/0x810 net/hsr/hsr_device.c:294 RIP: 0010:send_hsr_supervision_frame+0x60a/0x810 net/hsr/hsr_device.c:294 ... Call Trace: hsr_announce+0x114/0x370 net/hsr/hsr_device.c:382 call_timer_fn+0x193/0x590 kernel/time/timer.c:1700 expire_timers kernel/time/timer.c:1751 [inline] __run_timers+0x764/0xb20 kernel/time/timer.c:2022 run_timer_softirq+0x58/0xd0 kernel/time/timer.c:2035 __do_softirq+0x21a/0x8de kernel/softirq.c:553 invoke_softirq kernel/softirq.c:427 [inline] __irq_exit_rcu kernel/softirq.c:632 [inline] irq_exit_rcu+0xb7/0x120 kernel/softirq.c:644 sysvec_apic_timer_interrupt+0x95/0xb0 arch/x86/kernel/apic/apic.c:1076 asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:649 ... This issue is also found in older kernels (at least up to 5.10). Cc: stable@vger.kernel.org Reported-by: syzbot+3ae0a3f42c84074b7c8e@syzkaller.appspotmail.com Fixes: 121c33b07b31 ("net: hsr: introduce common code for skb initialization") Signed-off-by: Nikita Zhandarovich Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 7ceb9ac6e7309..9d71b66183daf 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -308,7 +308,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, skb = hsr_init_skb(master); if (!skb) { - WARN_ONCE(1, "HSR: Could not send supervision frame\n"); + netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n"); return; } @@ -355,7 +355,7 @@ static void send_prp_supervision_frame(struct hsr_port *master, skb = hsr_init_skb(master); if (!skb) { - WARN_ONCE(1, "PRP: Could not send supervision frame\n"); + netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n"); return; } -- GitLab From bfb007aebe6bff451f7f3a4be19f4f286d0d5d9c Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Thu, 25 Jan 2024 12:53:09 +0300 Subject: [PATCH 0296/1405] nfc: nci: free rx_data_reassembly skb on NCI device cleanup rx_data_reassembly skb is stored during NCI data exchange for processing fragmented packets. It is dropped only when the last fragment is processed or when an NTF packet with NCI_OP_RF_DEACTIVATE_NTF opcode is received. However, the NCI device may be deallocated before that which leads to skb leak. As by design the rx_data_reassembly skb is bound to the NCI device and nothing prevents the device to be freed before the skb is processed in some way and cleaned, free it on the NCI device cleanup. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 6a2968aaf50c ("NFC: basic NCI protocol implementation") Cc: stable@vger.kernel.org Reported-by: syzbot+6b7c68d9c21e4ee4251b@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/000000000000f43987060043da7b@google.com/ Signed-off-by: Fedor Pchelkin Signed-off-by: David S. Miller --- net/nfc/nci/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 97348cedb16b3..cdad47b140fa4 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1208,6 +1208,10 @@ void nci_free_device(struct nci_dev *ndev) { nfc_free_device(ndev->nfc_dev); nci_hci_deallocate(ndev); + + /* drop partial rx data packet if present */ + if (ndev->rx_data_reassembly) + kfree_skb(ndev->rx_data_reassembly); kfree(ndev); } EXPORT_SYMBOL(nci_free_device); -- GitLab From 577e4432f3ac810049cb7e6b71f4d96ec7c6e894 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 25 Jan 2024 10:33:17 +0000 Subject: [PATCH 0297/1405] tcp: add sanity checks to rx zerocopy TCP rx zerocopy intent is to map pages initially allocated from NIC drivers, not pages owned by a fs. This patch adds to can_map_frag() these additional checks: - Page must not be a compound one. - page->mapping must be NULL. This fixes the panic reported by ZhangPeng. syzbot was able to loopback packets built with sendfile(), mapping pages owned by an ext4 file to TCP rx zerocopy. r3 = socket$inet_tcp(0x2, 0x1, 0x0) mmap(&(0x7f0000ff9000/0x4000)=nil, 0x4000, 0x0, 0x12, r3, 0x0) r4 = socket$inet_tcp(0x2, 0x1, 0x0) bind$inet(r4, &(0x7f0000000000)={0x2, 0x4e24, @multicast1}, 0x10) connect$inet(r4, &(0x7f00000006c0)={0x2, 0x4e24, @empty}, 0x10) r5 = openat$dir(0xffffffffffffff9c, &(0x7f00000000c0)='./file0\x00', 0x181e42, 0x0) fallocate(r5, 0x0, 0x0, 0x85b8) sendfile(r4, r5, 0x0, 0x8ba0) getsockopt$inet_tcp_TCP_ZEROCOPY_RECEIVE(r4, 0x6, 0x23, &(0x7f00000001c0)={&(0x7f0000ffb000/0x3000)=nil, 0x3000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, &(0x7f0000000440)=0x40) r6 = openat$dir(0xffffffffffffff9c, &(0x7f00000000c0)='./file0\x00', 0x181e42, 0x0) Fixes: 93ab6cc69162 ("tcp: implement mmap() for zero copy receive") Link: https://lore.kernel.org/netdev/5106a58e-04da-372a-b836-9d3d0bd2507b@huawei.com/T/ Reported-and-bisected-by: ZhangPeng Signed-off-by: Eric Dumazet Cc: Arjun Roy Cc: Matthew Wilcox Cc: linux-mm@vger.kernel.org Cc: Andrew Morton Cc: linux-fsdevel@vger.kernel.org Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a1c6de385ccef..7e2481b9eae1b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1786,7 +1786,17 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb, static bool can_map_frag(const skb_frag_t *frag) { - return skb_frag_size(frag) == PAGE_SIZE && !skb_frag_off(frag); + struct page *page; + + if (skb_frag_size(frag) != PAGE_SIZE || skb_frag_off(frag)) + return false; + + page = skb_frag_page(frag); + + if (PageCompound(page) || page->mapping) + return false; + + return true; } static int find_next_mappable_frag(const skb_frag_t *frag, -- GitLab From 2147caaac7349698f2a392c5e2911a6861a09650 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Mon, 29 Jan 2024 09:49:18 +0000 Subject: [PATCH 0298/1405] netfs: Fix i_dio_count leak on DIO read past i_size If netfs_begin_read gets a NETFS_DIO_READ request that begins past i_size, it won't perform any i/o and just return 0. This will leak an increment to i_dio_count that is done at the top of the function. This can cause subsequent buffered read requests to block indefinitely, waiting for a non existing dio operation to complete. Add a inode_dio_end() for the NETFS_DIO_READ case, before returning. Signed-off-by: Marc Dionne Signed-off-by: David Howells Link: https://lore.kernel.org/r/20240129094924.1221977-2-dhowells@redhat.com Reviewed-by: Jeff Layton cc: Jeff Layton cc: cc: cc: Signed-off-by: Christian Brauner --- fs/netfs/io.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/netfs/io.c b/fs/netfs/io.c index e8ff1e61ce79b..4261ad6c55b66 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -748,6 +748,8 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) if (!rreq->submitted) { netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit); + if (rreq->origin == NETFS_DIO_READ) + inode_dio_end(rreq->inode); ret = 0; goto out; } -- GitLab From ca9ca1a5d5a980550db1001ea825f9fdfa550b83 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 29 Jan 2024 09:49:19 +0000 Subject: [PATCH 0299/1405] netfs: Fix missing zero-length check in unbuffered write Fix netfs_unbuffered_write_iter() to return immediately if generic_write_checks() returns 0, indicating there's nothing to write. Note that netfs_file_write_iter() already does this. Also, whilst we're at it, put in checks for the size being zero before we even take the locks. Note that generic_write_checks() can still reduce the size to zero, so we still need that check. Without this, a warning similar to the following is logged to dmesg: netfs: Zero-sized write [R=1b6da] and the syscall fails with EIO, e.g.: /sbin/ldconfig.real: Writing of cache extension data failed: Input/output error This can be reproduced on 9p by: xfs_io -f -c 'pwrite 0 0' /xfstest.test/foo Fixes: 153a9961b551 ("netfs: Implement unbuffered/DIO write support") Reported-by: Eric Van Hensbergen Link: https://lore.kernel.org/r/ZbQUU6QKmIftKsmo@FV7GG9FTHL/ Signed-off-by: David Howells Link: https://lore.kernel.org/r/20240129094924.1221977-3-dhowells@redhat.com Tested-by: Dominique Martinet Reviewed-by: Jeff Layton cc: Dominique Martinet cc: Jeff Layton cc: cc: cc: cc: Signed-off-by: Christian Brauner --- fs/netfs/buffered_write.c | 3 +++ fs/netfs/direct_write.c | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index a3059b3168fd9..9a0d32e4b422a 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -477,6 +477,9 @@ ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); + if (!iov_iter_count(from)) + return 0; + if ((iocb->ki_flags & IOCB_DIRECT) || test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)) return netfs_unbuffered_write_iter(iocb, from); diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index 60a40d293c87f..bee047e20f5d6 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -139,6 +139,9 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from) _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); + if (!iov_iter_count(from)) + return 0; + trace_netfs_write_iter(iocb, from); netfs_stat(&netfs_n_rh_dio_write); @@ -146,7 +149,7 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ret < 0) return ret; ret = generic_write_checks(iocb, from); - if (ret < 0) + if (ret <= 0) goto out; ret = file_remove_privs(file); if (ret < 0) -- GitLab From c92c108403b09f75f3393588c2326ecad49ee2e2 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 19 Jan 2024 03:08:37 -0600 Subject: [PATCH 0300/1405] Revert "drm/amd/pm: fix the high voltage and temperature issue" This reverts commit 5f38ac54e60562323ea4abb1bfb37d043ee23357. This causes issues with rebooting and the 7800XT. Cc: Kenneth Feng Cc: stable@vger.kernel.org Fixes: 5f38ac54e605 ("drm/amd/pm: fix the high voltage and temperature issue") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3062 Signed-off-by: Mario Limonciello Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 24 ++++---------- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 33 ++----------------- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 1 - .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 8 +---- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 8 +---- 5 files changed, 11 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b158d27d0a71c..31b28e6f35b26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4121,23 +4121,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, } } } else { - switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { - case IP_VERSION(13, 0, 0): - case IP_VERSION(13, 0, 7): - case IP_VERSION(13, 0, 10): - r = psp_gpu_reset(adev); - break; - default: - tmp = amdgpu_reset_method; - /* It should do a default reset when loading or reloading the driver, - * regardless of the module parameter reset_method. - */ - amdgpu_reset_method = AMD_RESET_METHOD_NONE; - r = amdgpu_asic_reset(adev); - amdgpu_reset_method = tmp; - break; - } - + tmp = amdgpu_reset_method; + /* It should do a default reset when loading or reloading the driver, + * regardless of the module parameter reset_method. + */ + amdgpu_reset_method = AMD_RESET_METHOD_NONE; + r = amdgpu_asic_reset(adev); + amdgpu_reset_method = tmp; if (r) { dev_err(adev->dev, "asic reset on init failed\n"); goto failed; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 7ffad3eb0a015..0ad947df777ab 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -734,7 +734,7 @@ static int smu_early_init(void *handle) smu->adev = adev; smu->pm_enabled = !!amdgpu_dpm; smu->is_apu = false; - smu->smu_baco.state = SMU_BACO_STATE_NONE; + smu->smu_baco.state = SMU_BACO_STATE_EXIT; smu->smu_baco.platform_support = false; smu->user_dpm_profile.fan_mode = -1; @@ -1954,31 +1954,10 @@ static int smu_smc_hw_cleanup(struct smu_context *smu) return 0; } -static int smu_reset_mp1_state(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - int ret = 0; - - if ((!adev->in_runpm) && (!adev->in_suspend) && - (!amdgpu_in_reset(adev))) - switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { - case IP_VERSION(13, 0, 0): - case IP_VERSION(13, 0, 7): - case IP_VERSION(13, 0, 10): - ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD); - break; - default: - break; - } - - return ret; -} - static int smu_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = adev->powerplay.pp_handle; - int ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; @@ -1996,15 +1975,7 @@ static int smu_hw_fini(void *handle) adev->pm.dpm_enabled = false; - ret = smu_smc_hw_cleanup(smu); - if (ret) - return ret; - - ret = smu_reset_mp1_state(smu); - if (ret) - return ret; - - return 0; + return smu_smc_hw_cleanup(smu); } static void smu_late_fini(void *handle) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 2aa4fea873147..66e84defd0b6e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -424,7 +424,6 @@ enum smu_reset_mode { enum smu_baco_state { SMU_BACO_STATE_ENTER = 0, SMU_BACO_STATE_EXIT, - SMU_BACO_STATE_NONE, }; struct smu_baco_context { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 4fdf34fffa9a5..3230701d0d381 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2748,13 +2748,7 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu, switch (mp1_state) { case PP_MP1_STATE_UNLOAD: - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_PrepareMp1ForUnload, - 0x55, NULL); - - if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT) - ret = smu_v13_0_disable_pmfw_state(smu); - + ret = smu_cmn_set_mp1_state(smu, mp1_state); break; default: /* Ignore others */ diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 7c3e162e2d818..0ffdb58af74e6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2505,13 +2505,7 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu, switch (mp1_state) { case PP_MP1_STATE_UNLOAD: - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_PrepareMp1ForUnload, - 0x55, NULL); - - if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT) - ret = smu_v13_0_disable_pmfw_state(smu); - + ret = smu_cmn_set_mp1_state(smu, mp1_state); break; default: /* Ignore others */ -- GitLab From e42e29cc442395d62f1a8963ec2dfb700ba6a5d7 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Mon, 29 Jan 2024 08:40:23 -0600 Subject: [PATCH 0301/1405] Revert "jfs: fix shift-out-of-bounds in dbJoin" This reverts commit cca974daeb6c43ea971f8ceff5a7080d7d49ee30. The added sanity check is incorrect. BUDMIN is not the wrong value and is too small. Signed-off-by: Dave Kleikamp --- fs/jfs/jfs_dmap.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 8eec84c651bfb..cb3cda1390adb 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -2763,9 +2763,7 @@ static int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl) * leafno - the number of the leaf to be updated. * newval - the new value for the leaf. * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error + * RETURN VALUES: none */ static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl) { @@ -2792,10 +2790,6 @@ static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl) * get the buddy size (number of words covered) of * the new value. */ - - if ((newval - tp->dmt_budmin) > BUDMIN) - return -EIO; - budsz = BUDSIZE(newval, tp->dmt_budmin); /* try to join. -- GitLab From 6d3c7fb17b4c047ccd0b42cf1308da693ab45acb Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 24 Jan 2024 09:27:27 -0800 Subject: [PATCH 0302/1405] nvme: use ctrl state accessor The ctrl->state value is updated in another thread using WRITE_ONCE, so ensure all the readers use the appropriate accessor. Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/apple.c | 13 +++++++------ drivers/nvme/host/auth.c | 2 +- drivers/nvme/host/core.c | 4 ++-- drivers/nvme/host/fabrics.h | 8 +++++--- drivers/nvme/host/multipath.c | 15 ++++++++------- drivers/nvme/host/nvme.h | 11 ++++++----- drivers/nvme/host/sysfs.c | 6 +++--- drivers/nvme/target/loop.c | 8 +++++--- 8 files changed, 37 insertions(+), 30 deletions(-) diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index 596bb11eeba5a..c727cd1f264bf 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -797,6 +797,7 @@ static int apple_nvme_init_request(struct blk_mq_tag_set *set, static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown) { + enum nvme_ctrl_state state = nvme_ctrl_state(&anv->ctrl); u32 csts = readl(anv->mmio_nvme + NVME_REG_CSTS); bool dead = false, freeze = false; unsigned long flags; @@ -808,8 +809,8 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown) if (csts & NVME_CSTS_CFS) dead = true; - if (anv->ctrl.state == NVME_CTRL_LIVE || - anv->ctrl.state == NVME_CTRL_RESETTING) { + if (state == NVME_CTRL_LIVE || + state == NVME_CTRL_RESETTING) { freeze = true; nvme_start_freeze(&anv->ctrl); } @@ -881,7 +882,7 @@ static enum blk_eh_timer_return apple_nvme_timeout(struct request *req) unsigned long flags; u32 csts = readl(anv->mmio_nvme + NVME_REG_CSTS); - if (anv->ctrl.state != NVME_CTRL_LIVE) { + if (nvme_ctrl_state(&anv->ctrl) != NVME_CTRL_LIVE) { /* * From rdma.c: * If we are resetting, connecting or deleting we should @@ -985,10 +986,10 @@ static void apple_nvme_reset_work(struct work_struct *work) u32 boot_status, aqa; struct apple_nvme *anv = container_of(work, struct apple_nvme, ctrl.reset_work); + enum nvme_ctrl_state state = nvme_ctrl_state(&anv->ctrl); - if (anv->ctrl.state != NVME_CTRL_RESETTING) { - dev_warn(anv->dev, "ctrl state %d is not RESETTING\n", - anv->ctrl.state); + if (state != NVME_CTRL_RESETTING) { + dev_warn(anv->dev, "ctrl state %d is not RESETTING\n", state); ret = -ENODEV; goto out; } diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 72c0525c75f50..2a12ee878783f 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -897,7 +897,7 @@ static void nvme_ctrl_auth_work(struct work_struct *work) * If the ctrl is no connected, bail as reconnect will handle * authentication. */ - if (ctrl->state != NVME_CTRL_LIVE) + if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) return; /* Authenticate admin queue first */ diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0810be0d1154c..a42c347d08e87 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -721,7 +721,7 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, EXPORT_SYMBOL_GPL(nvme_fail_nonready_command); bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, - bool queue_live) + bool queue_live, enum nvme_ctrl_state state) { struct nvme_request *req = nvme_req(rq); @@ -742,7 +742,7 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, * command, which is require to set the queue live in the * appropinquate states. */ - switch (nvme_ctrl_state(ctrl)) { + switch (state) { case NVME_CTRL_CONNECTING: if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) && (req->cmd->fabrics.fctype == nvme_fabrics_type_connect || diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index fbaee5a7be196..06cc54851b1be 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -185,9 +185,11 @@ static inline bool nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl, struct nvmf_ctrl_options *opts) { - if (ctrl->state == NVME_CTRL_DELETING || - ctrl->state == NVME_CTRL_DELETING_NOIO || - ctrl->state == NVME_CTRL_DEAD || + enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); + + if (state == NVME_CTRL_DELETING || + state == NVME_CTRL_DELETING_NOIO || + state == NVME_CTRL_DEAD || strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) || strcmp(opts->host->nqn, ctrl->opts->host->nqn) || !uuid_equal(&opts->host->id, &ctrl->opts->host->id)) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 2dd4137a08b28..74de1e64aeead 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -156,7 +156,7 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) if (!ns->head->disk) continue; kblockd_schedule_work(&ns->head->requeue_work); - if (ctrl->state == NVME_CTRL_LIVE) + if (nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE) disk_uevent(ns->head->disk, KOBJ_CHANGE); } up_read(&ctrl->namespaces_rwsem); @@ -223,13 +223,14 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns) static bool nvme_path_is_disabled(struct nvme_ns *ns) { + enum nvme_ctrl_state state = nvme_ctrl_state(ns->ctrl); + /* * We don't treat NVME_CTRL_DELETING as a disabled path as I/O should * still be able to complete assuming that the controller is connected. * Otherwise it will fail immediately and return to the requeue list. */ - if (ns->ctrl->state != NVME_CTRL_LIVE && - ns->ctrl->state != NVME_CTRL_DELETING) + if (state != NVME_CTRL_LIVE && state != NVME_CTRL_DELETING) return true; if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) || !test_bit(NVME_NS_READY, &ns->flags)) @@ -331,7 +332,7 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head, static inline bool nvme_path_is_optimized(struct nvme_ns *ns) { - return ns->ctrl->state == NVME_CTRL_LIVE && + return nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE && ns->ana_state == NVME_ANA_OPTIMIZED; } @@ -358,7 +359,7 @@ static bool nvme_available_path(struct nvme_ns_head *head) list_for_each_entry_rcu(ns, &head->list, siblings) { if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags)) continue; - switch (ns->ctrl->state) { + switch (nvme_ctrl_state(ns->ctrl)) { case NVME_CTRL_LIVE: case NVME_CTRL_RESETTING: case NVME_CTRL_CONNECTING: @@ -667,7 +668,7 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, * controller is ready. */ if (nvme_state_is_live(ns->ana_state) && - ns->ctrl->state == NVME_CTRL_LIVE) + nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE) nvme_mpath_set_live(ns); } @@ -748,7 +749,7 @@ static void nvme_ana_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ana_work); - if (ctrl->state != NVME_CTRL_LIVE) + if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) return; nvme_read_ana_log(ctrl); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 030c808182406..1700063bc24de 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -805,17 +805,18 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req); blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, struct request *req); bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, - bool queue_live); + bool queue_live, enum nvme_ctrl_state state); static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, bool queue_live) { - if (likely(ctrl->state == NVME_CTRL_LIVE)) + enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); + + if (likely(state == NVME_CTRL_LIVE)) return true; - if (ctrl->ops->flags & NVME_F_FABRICS && - ctrl->state == NVME_CTRL_DELETING) + if (ctrl->ops->flags & NVME_F_FABRICS && state == NVME_CTRL_DELETING) return queue_live; - return __nvme_check_ready(ctrl, rq, queue_live); + return __nvme_check_ready(ctrl, rq, queue_live, state); } /* diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 754e911110420..6b2f06f190de3 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -311,6 +311,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev, char *buf) { struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + unsigned state = (unsigned)nvme_ctrl_state(ctrl); static const char *const state_name[] = { [NVME_CTRL_NEW] = "new", [NVME_CTRL_LIVE] = "live", @@ -321,9 +322,8 @@ static ssize_t nvme_sysfs_show_state(struct device *dev, [NVME_CTRL_DEAD] = "dead", }; - if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) && - state_name[ctrl->state]) - return sysfs_emit(buf, "%s\n", state_name[ctrl->state]); + if (state < ARRAY_SIZE(state_name) && state_name[state]) + return sysfs_emit(buf, "%s\n", state_name[state]); return sysfs_emit(buf, "unknown state\n"); } diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 2b2e0d00af85e..e589915ddef85 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -400,7 +400,7 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) } nvme_quiesce_admin_queue(&ctrl->ctrl); - if (ctrl->ctrl.state == NVME_CTRL_LIVE) + if (nvme_ctrl_state(&ctrl->ctrl) == NVME_CTRL_LIVE) nvme_disable_ctrl(&ctrl->ctrl, true); nvme_cancel_admin_tagset(&ctrl->ctrl); @@ -434,8 +434,10 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) nvme_loop_shutdown_ctrl(ctrl); if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { - if (ctrl->ctrl.state != NVME_CTRL_DELETING && - ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO) + enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); + + if (state != NVME_CTRL_DELETING && + state != NVME_CTRL_DELETING_NOIO) /* state change failure for non-deleted ctrl? */ WARN_ON_ONCE(1); return; -- GitLab From 346f59d1e8ed0eed41c80e1acb657e484c308e6a Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Mon, 29 Jan 2024 15:12:54 +0300 Subject: [PATCH 0303/1405] ALSA: usb-audio: Check presence of valid altsetting control Many devices with a single alternate setting do not have a Valid Alternate Setting Control and validation performed by validate_sample_rate_table_v2v3() doesn't work on them and is not really needed. So check the presense of control before sending altsetting validation requests. MOTU Microbook IIc is suffering the most without this check. It takes up to 40 seconds to bootup due to how slow it switches sampling rates: [ 2659.164824] usb 3-2: New USB device found, idVendor=07fd, idProduct=0004, bcdDevice= 0.60 [ 2659.164827] usb 3-2: New USB device strings: Mfr=1, Product=2, SerialNumber=0 [ 2659.164829] usb 3-2: Product: MicroBook IIc [ 2659.164830] usb 3-2: Manufacturer: MOTU [ 2659.166204] usb 3-2: Found last interface = 3 [ 2679.322298] usb 3-2: No valid sample rate available for 1:1, assuming a firmware bug [ 2679.322306] usb 3-2: 1:1: add audio endpoint 0x3 [ 2679.322321] usb 3-2: Creating new data endpoint #3 [ 2679.322552] usb 3-2: 1:1 Set sample rate 96000, clock 1 [ 2684.362250] usb 3-2: 2:1: cannot get freq (v2/v3): err -110 [ 2694.444700] usb 3-2: No valid sample rate available for 2:1, assuming a firmware bug [ 2694.444707] usb 3-2: 2:1: add audio endpoint 0x84 [ 2694.444721] usb 3-2: Creating new data endpoint #84 [ 2699.482103] usb 3-2: 2:1 Set sample rate 96000, clock 1 Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20240129121254.3454481-1-alexander@tsoy.me Signed-off-by: Takashi Iwai --- sound/usb/format.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sound/usb/format.c b/sound/usb/format.c index ab5fed9f55b60..3b45d0ee76938 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -470,9 +470,11 @@ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip, int clock) { struct usb_device *dev = chip->dev; + struct usb_host_interface *alts; unsigned int *table; unsigned int nr_rates; int i, err; + u32 bmControls; /* performing the rate verification may lead to unexpected USB bus * behavior afterwards by some unknown reason. Do this only for the @@ -481,6 +483,24 @@ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip, if (!(chip->quirk_flags & QUIRK_FLAG_VALIDATE_RATES)) return 0; /* don't perform the validation as default */ + alts = snd_usb_get_host_interface(chip, fp->iface, fp->altsetting); + if (!alts) + return 0; + + if (fp->protocol == UAC_VERSION_3) { + struct uac3_as_header_descriptor *as = snd_usb_find_csint_desc( + alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); + bmControls = le32_to_cpu(as->bmControls); + } else { + struct uac2_as_header_descriptor *as = snd_usb_find_csint_desc( + alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); + bmControls = as->bmControls; + } + + if (!uac_v2v3_control_is_readable(bmControls, + UAC2_AS_VAL_ALT_SETTINGS)) + return 0; + table = kcalloc(fp->nr_rates, sizeof(*table), GFP_KERNEL); if (!table) return -ENOMEM; -- GitLab From 21fdd8dd3726199451fc495f20104356e071a997 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 29 Jan 2024 13:00:07 -0300 Subject: [PATCH 0304/1405] tools headers UAPI: Sync unistd.h to pick {list,stat}mount, lsm_{[gs]et_self_attr,list_modules} syscall numbers To pick the changes in these csets: d8b0f5465012538c ("wire up syscalls for statmount/listmount") 5f42375904b08890 ("LSM: wireup Linux Security Module syscalls") Used in some architectures to create syscall tables. This addresses this perf build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Cc: Casey Schaufler Cc: Christian Brauner Cc: Miklos Szeredi Cc: Paul Moore Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZbfMuAlUMRO9Hqa6@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/unistd.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 756b013fb8324..75f00965ab158 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -829,8 +829,21 @@ __SYSCALL(__NR_futex_wait, sys_futex_wait) #define __NR_futex_requeue 456 __SYSCALL(__NR_futex_requeue, sys_futex_requeue) +#define __NR_statmount 457 +__SYSCALL(__NR_statmount, sys_statmount) + +#define __NR_listmount 458 +__SYSCALL(__NR_listmount, sys_listmount) + +#define __NR_lsm_get_self_attr 459 +__SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr) +#define __NR_lsm_set_self_attr 460 +__SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr) +#define __NR_lsm_list_modules 461 +__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) + #undef __NR_syscalls -#define __NR_syscalls 457 +#define __NR_syscalls 462 /* * 32 bit systems traditionally used different -- GitLab From c6dce23ec993f7da7790a9eadb36864ceb60e942 Mon Sep 17 00:00:00 2001 From: Techno Mooney Date: Mon, 29 Jan 2024 15:11:47 +0700 Subject: [PATCH 0305/1405] ASoC: amd: yc: Add DMI quirk for MSI Bravo 15 C7VF The laptop requires a quirk ID to enable its internal microphone. Add it to the DMI quirk table. Reported-by: Techno Mooney Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218402 Cc: stable@vger.kernel.org Signed-off-by: Techno Mooney Signed-off-by: Bagas Sanjaya Link: https://msgid.link/r/20240129081148.1044891-1-bagasdotme@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index d83cb6e4c62ae..23d44a50d8157 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -297,6 +297,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 B7ED"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Micro-Star International Co., Ltd."), + DMI_MATCH(DMI_PRODUCT_NAME, "Bravo 15 C7VF"), + } + }, { .driver_data = &acp6x_card, .matches = { -- GitLab From e4ad71e2367f3f7b7409c30df9cdbb34da15e012 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Fri, 26 Jan 2024 11:57:24 +0100 Subject: [PATCH 0306/1405] MAINTAINERS: wifi: brcm80211: cleanup entry There has been some discussion about what is expected from a maintainer and so a cleanup seems to be in order. A dedicated mailing list has been created to discuss brcm80211 specific development issues. Keeping the status as Supported although help in maintaining this driver is welcomed. Cc: brcm80211@lists.linux.dev Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://msgid.link/20240126105724.384063-1-arend.vanspriel@broadcom.com --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index d8ca0e10c8d10..25d530ab90d7b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4169,14 +4169,14 @@ F: drivers/firmware/broadcom/tee_bnxt_fw.c F: drivers/net/ethernet/broadcom/bnxt/ F: include/linux/firmware/broadcom/tee_bnxt_fw.h -BROADCOM BRCM80211 IEEE802.11n WIRELESS DRIVER -M: Arend van Spriel -M: Franky Lin -M: Hante Meuleman +BROADCOM BRCM80211 IEEE802.11 WIRELESS DRIVERS +M: Arend van Spriel L: linux-wireless@vger.kernel.org +L: brcm80211@lists.linux.dev L: brcm80211-dev-list.pdl@broadcom.com S: Supported F: drivers/net/wireless/broadcom/brcm80211/ +F: include/linux/platform_data/brcmfmac.h BROADCOM BRCMSTB GPIO DRIVER M: Doug Berger -- GitLab From 02add85a9eefb5c969b9bb6916f14607ca2faae0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 29 Nov 2023 14:40:41 -0800 Subject: [PATCH 0307/1405] KVM: selftests: Reword the NX hugepage test's skip message to be more helpful Rework the NX hugepage test's skip message regarding the magic token to provide all of the necessary magic, and to very explicitly recommended using the wrapper shell script. Opportunistically remove an overzealous newline; splitting the recommendation message across two lines of ~45 characters makes it much harder to read than running out a single line to 98 characters. Link: https://lore.kernel.org/r/20231129224042.530798-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 83e25bccc139d..17bbb96fc4dfc 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -257,9 +257,9 @@ int main(int argc, char **argv) TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES)); __TEST_REQUIRE(token == MAGIC_TOKEN, - "This test must be run with the magic token %d.\n" - "This is done by nx_huge_pages_test.sh, which\n" - "also handles environment setup for the test.", MAGIC_TOKEN); + "This test must be run with the magic token via '-t %d'.\n" + "Running via nx_huge_pages_test.sh, which also handles " + "environment setup, is strongly recommended.", MAGIC_TOKEN); run_test(reclaim_period_ms, false, reboot_permissions); run_test(reclaim_period_ms, true, reboot_permissions); -- GitLab From 250e138d876838774bca3140d544438898df0489 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 6 Dec 2023 18:02:43 +0100 Subject: [PATCH 0308/1405] KVM: selftests: Remove redundant newlines TEST_* functions append their own newline. Remove newlines from TEST_* callsites to avoid extra newlines in output. Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20231206170241.82801-8-ajones@ventanamicro.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/demand_paging_test.c | 4 ++-- .../testing/selftests/kvm/dirty_log_perf_test.c | 4 ++-- tools/testing/selftests/kvm/dirty_log_test.c | 4 ++-- tools/testing/selftests/kvm/get-reg-list.c | 2 +- tools/testing/selftests/kvm/guest_print_test.c | 8 ++++---- .../selftests/kvm/hardware_disable_test.c | 6 +++--- .../testing/selftests/kvm/kvm_create_max_vcpus.c | 2 +- .../testing/selftests/kvm/kvm_page_table_test.c | 4 ++-- tools/testing/selftests/kvm/lib/elf.c | 2 +- tools/testing/selftests/kvm/lib/kvm_util.c | 16 ++++++++-------- tools/testing/selftests/kvm/lib/memstress.c | 2 +- .../testing/selftests/kvm/lib/userfaultfd_util.c | 2 +- .../kvm/memslot_modification_stress_test.c | 2 +- tools/testing/selftests/kvm/memslot_perf_test.c | 6 +++--- tools/testing/selftests/kvm/rseq_test.c | 4 ++-- .../selftests/kvm/set_memory_region_test.c | 6 +++--- .../selftests/kvm/system_counter_offset_test.c | 2 +- 17 files changed, 38 insertions(+), 38 deletions(-) diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 09c116a82a849..bf3609f718544 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -45,10 +45,10 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) /* Let the guest access its memory */ ret = _vcpu_run(vcpu); - TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret); if (get_ucall(vcpu, NULL) != UCALL_SYNC) { TEST_ASSERT(false, - "Invalid guest sync status: exit_reason=%s\n", + "Invalid guest sync status: exit_reason=%s", exit_reason_str(run->exit_reason)); } diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index d374dbcf9a535..504f6fe980e8f 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -88,9 +88,9 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) ret = _vcpu_run(vcpu); ts_diff = timespec_elapsed(start); - TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret); TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, - "Invalid guest sync status: exit_reason=%s\n", + "Invalid guest sync status: exit_reason=%s", exit_reason_str(run->exit_reason)); pr_debug("Got sync event from vCPU %d\n", vcpu_idx); diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 6cbecf4997676..babea97b31a43 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -262,7 +262,7 @@ static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) "vcpu run failed: errno=%d", err); TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, - "Invalid guest sync status: exit_reason=%s\n", + "Invalid guest sync status: exit_reason=%s", exit_reason_str(run->exit_reason)); vcpu_handle_sync_stop(); @@ -410,7 +410,7 @@ static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) pr_info("vcpu continues now.\n"); } else { TEST_ASSERT(false, "Invalid guest sync status: " - "exit_reason=%s\n", + "exit_reason=%s", exit_reason_str(run->exit_reason)); } } diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c index 8274ef04301f6..91f05f78e8249 100644 --- a/tools/testing/selftests/kvm/get-reg-list.c +++ b/tools/testing/selftests/kvm/get-reg-list.c @@ -152,7 +152,7 @@ static void check_supported(struct vcpu_reg_list *c) continue; __TEST_REQUIRE(kvm_has_cap(s->capability), - "%s: %s not available, skipping tests\n", + "%s: %s not available, skipping tests", config_name(c), s->name); } } diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c index 41230b7461902..3502caa3590c6 100644 --- a/tools/testing/selftests/kvm/guest_print_test.c +++ b/tools/testing/selftests/kvm/guest_print_test.c @@ -98,7 +98,7 @@ static void ucall_abort(const char *assert_msg, const char *expected_assert_msg) int offset = len_str - len_substr; TEST_ASSERT(len_substr <= len_str, - "Expected '%s' to be a substring of '%s'\n", + "Expected '%s' to be a substring of '%s'", assert_msg, expected_assert_msg); TEST_ASSERT(strcmp(&assert_msg[offset], expected_assert_msg) == 0, @@ -116,7 +116,7 @@ static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf, vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, - "Unexpected exit reason: %u (%s),\n", + "Unexpected exit reason: %u (%s),", run->exit_reason, exit_reason_str(run->exit_reason)); switch (get_ucall(vcpu, &uc)) { @@ -161,11 +161,11 @@ static void test_limits(void) vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, - "Unexpected exit reason: %u (%s),\n", + "Unexpected exit reason: %u (%s),", run->exit_reason, exit_reason_str(run->exit_reason)); TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_ABORT, - "Unexpected ucall command: %lu, Expected: %u (UCALL_ABORT)\n", + "Unexpected ucall command: %lu, Expected: %u (UCALL_ABORT)", uc.cmd, UCALL_ABORT); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c index f5d59b9934f18..decc521fc7603 100644 --- a/tools/testing/selftests/kvm/hardware_disable_test.c +++ b/tools/testing/selftests/kvm/hardware_disable_test.c @@ -41,7 +41,7 @@ static void *run_vcpu(void *arg) vcpu_run(vcpu); - TEST_ASSERT(false, "%s: exited with reason %d: %s\n", + TEST_ASSERT(false, "%s: exited with reason %d: %s", __func__, run->exit_reason, exit_reason_str(run->exit_reason)); pthread_exit(NULL); @@ -55,7 +55,7 @@ static void *sleeping_thread(void *arg) fd = open("/dev/null", O_RDWR); close(fd); } - TEST_ASSERT(false, "%s: exited\n", __func__); + TEST_ASSERT(false, "%s: exited", __func__); pthread_exit(NULL); } @@ -118,7 +118,7 @@ static void run_test(uint32_t run) for (i = 0; i < VCPU_NUM; ++i) check_join(threads[i], &b); /* Should not be reached */ - TEST_ASSERT(false, "%s: [%d] child escaped the ninja\n", __func__, run); + TEST_ASSERT(false, "%s: [%d] child escaped the ninja", __func__, run); } void wait_for_child_setup(pid_t pid) diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index 31b3cb24b9a75..b9e23265e4b38 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -65,7 +65,7 @@ int main(int argc, char *argv[]) int r = setrlimit(RLIMIT_NOFILE, &rl); __TEST_REQUIRE(r >= 0, - "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n", + "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)", old_rlim_max, nr_fds_wanted); } else { TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index e37dc9c21888f..e0ba97ac1c561 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -204,9 +204,9 @@ static void *vcpu_worker(void *data) ret = _vcpu_run(vcpu); ts_diff = timespec_elapsed(start); - TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret); TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, - "Invalid guest sync status: exit_reason=%s\n", + "Invalid guest sync status: exit_reason=%s", exit_reason_str(vcpu->run->exit_reason)); pr_debug("Got sync event from vCPU %d\n", vcpu->id); diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index 266f3876e10af..f34d926d97359 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -184,7 +184,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) "Seek to program segment offset failed,\n" " program header idx: %u errno: %i\n" " offset_rv: 0x%jx\n" - " expected: 0x%jx\n", + " expected: 0x%jx", n1, errno, (intmax_t) offset_rv, (intmax_t) phdr.p_offset); test_read(fd, addr_gva2hva(vm, phdr.p_vaddr), diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index e066d584c6561..8754abaff5e91 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -320,7 +320,7 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, uint64_t nr_pages; TEST_ASSERT(nr_runnable_vcpus, - "Use vm_create_barebones() for VMs that _never_ have vCPUs\n"); + "Use vm_create_barebones() for VMs that _never_ have vCPUs"); TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), "nr_vcpus = %d too large for host, max-vcpus = %d", @@ -491,7 +491,7 @@ void kvm_pin_this_task_to_pcpu(uint32_t pcpu) CPU_ZERO(&mask); CPU_SET(pcpu, &mask); r = sched_setaffinity(0, sizeof(mask), &mask); - TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.\n", pcpu); + TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu); } static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) @@ -499,7 +499,7 @@ static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) uint32_t pcpu = atoi_non_negative("CPU number", cpu_str); TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask), - "Not allowed to run on pCPU '%d', check cgroups?\n", pcpu); + "Not allowed to run on pCPU '%d', check cgroups?", pcpu); return pcpu; } @@ -529,7 +529,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], int i, r; cpu_list = strdup(pcpus_string); - TEST_ASSERT(cpu_list, "strdup() allocation failed.\n"); + TEST_ASSERT(cpu_list, "strdup() allocation failed."); r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask); TEST_ASSERT(!r, "sched_getaffinity() failed"); @@ -538,7 +538,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], /* 1. Get all pcpus for vcpus. */ for (i = 0; i < nr_vcpus; i++) { - TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'\n", i); + TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'", i); vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask); cpu = strtok(NULL, delim); } @@ -1057,7 +1057,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" " rc: %i errno: %i\n" " slot: %u flags: 0x%x\n" - " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d\n", + " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d", ret, errno, slot, flags, guest_paddr, (uint64_t) region->region.memory_size, region->region.guest_memfd); @@ -1222,7 +1222,7 @@ void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, len = min_t(uint64_t, end - gpa, region->region.memory_size - offset); ret = fallocate(region->region.guest_memfd, mode, fd_offset, len); - TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx\n", + TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx", punch_hole ? "punch hole" : "allocate", gpa, len, region->region.guest_memfd, mode, fd_offset); } @@ -1265,7 +1265,7 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) struct kvm_vcpu *vcpu; /* Confirm a vcpu with the specified id doesn't already exist. */ - TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists\n", vcpu_id); + TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists", vcpu_id); /* Allocate and initialize new vcpu structure. */ vcpu = calloc(1, sizeof(*vcpu)); diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index d05487e5a371d..cf2c739713080 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -192,7 +192,7 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, TEST_ASSERT(guest_num_pages < region_end_gfn, "Requested more guest memory than address space allows.\n" " guest pages: %" PRIx64 " max gfn: %" PRIx64 - " nr_vcpus: %d wss: %" PRIx64 "]\n", + " nr_vcpus: %d wss: %" PRIx64 "]", guest_num_pages, region_end_gfn - 1, nr_vcpus, vcpu_memory_bytes); args->gpa = (region_end_gfn - guest_num_pages - 1) * args->guest_page_size; diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c index 271f638915812..f4eef6eb2dc2c 100644 --- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c @@ -69,7 +69,7 @@ static void *uffd_handler_thread_fn(void *arg) if (pollfd[1].revents & POLLIN) { r = read(pollfd[1].fd, &tmp_chr, 1); TEST_ASSERT(r == 1, - "Error reading pipefd in UFFD thread\n"); + "Error reading pipefd in UFFD thread"); break; } diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 9855c41ca811f..1563619666123 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -45,7 +45,7 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) /* Let the guest access its memory until a stop signal is received */ while (!READ_ONCE(memstress_args.stop_vcpus)) { ret = _vcpu_run(vcpu); - TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret); if (get_ucall(vcpu, NULL) == UCALL_SYNC) continue; diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 8698d1ab60d00..579a64f97333b 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -175,11 +175,11 @@ static void wait_for_vcpu(void) struct timespec ts; TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts), - "clock_gettime() failed: %d\n", errno); + "clock_gettime() failed: %d", errno); ts.tv_sec += 2; TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts), - "sem_timedwait() failed: %d\n", errno); + "sem_timedwait() failed: %d", errno); } static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages) @@ -336,7 +336,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, slot); TEST_ASSERT(gpa == guest_addr, - "vm_phy_pages_alloc() failed\n"); + "vm_phy_pages_alloc() failed"); data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr); memset(data->hva_slots[slot - 1], 0, npages * guest_page_size); diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index f74e76d03b7e3..28f97fb520441 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -245,7 +245,7 @@ int main(int argc, char *argv[]) } while (snapshot != atomic_read(&seq_cnt)); TEST_ASSERT(rseq_cpu == cpu, - "rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu); + "rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu); } /* @@ -256,7 +256,7 @@ int main(int argc, char *argv[]) * migrations given the 1us+ delay in the migration task. */ TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2), - "Only performed %d KVM_RUNs, task stalled too much?\n", i); + "Only performed %d KVM_RUNs, task stalled too much?", i); pthread_join(migration_thread, NULL); diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 075b80dbe2370..40337f566eebb 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -98,11 +98,11 @@ static void wait_for_vcpu(void) struct timespec ts; TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts), - "clock_gettime() failed: %d\n", errno); + "clock_gettime() failed: %d", errno); ts.tv_sec += 2; TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts), - "sem_timedwait() failed: %d\n", errno); + "sem_timedwait() failed: %d", errno); /* Wait for the vCPU thread to reenter the guest. */ usleep(100000); @@ -302,7 +302,7 @@ static void test_delete_memory_region(void) if (run->exit_reason == KVM_EXIT_INTERNAL_ERROR) TEST_ASSERT(regs.rip >= final_rip_start && regs.rip < final_rip_end, - "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx\n", + "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx", final_rip_start, final_rip_end, regs.rip); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/system_counter_offset_test.c b/tools/testing/selftests/kvm/system_counter_offset_test.c index 7f5b330b6a1b1..513d421a9bff8 100644 --- a/tools/testing/selftests/kvm/system_counter_offset_test.c +++ b/tools/testing/selftests/kvm/system_counter_offset_test.c @@ -108,7 +108,7 @@ static void enter_guest(struct kvm_vcpu *vcpu) handle_abort(&uc); return; default: - TEST_ASSERT(0, "unhandled ucall %ld\n", + TEST_ASSERT(0, "unhandled ucall %ld", get_ucall(vcpu, &uc)); } } -- GitLab From 95be17e4008b592342ec6cfb9264f4b54c21b790 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 6 Dec 2023 18:02:44 +0100 Subject: [PATCH 0309/1405] KVM: selftests: aarch64: Remove redundant newlines TEST_* functions append their own newline. Remove newlines from TEST_* callsites to avoid extra newlines in output. Signed-off-by: Andrew Jones Acked-by: Zenghui Yu Link: https://lore.kernel.org/r/20231206170241.82801-9-ajones@ventanamicro.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/aarch64/arch_timer.c | 12 ++++++------ tools/testing/selftests/kvm/aarch64/hypercalls.c | 16 ++++++++-------- .../selftests/kvm/aarch64/page_fault_test.c | 6 +++--- .../testing/selftests/kvm/aarch64/smccc_filter.c | 2 +- .../selftests/kvm/aarch64/vpmu_counter_access.c | 12 ++++++------ .../selftests/kvm/lib/aarch64/processor.c | 2 +- tools/testing/selftests/kvm/lib/aarch64/vgic.c | 4 ++-- 7 files changed, 27 insertions(+), 27 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index 274b8465b42a5..2cb8dd1f8275f 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -248,7 +248,7 @@ static void *test_vcpu_run(void *arg) REPORT_GUEST_ASSERT(uc); break; default: - TEST_FAIL("Unexpected guest exit\n"); + TEST_FAIL("Unexpected guest exit"); } return NULL; @@ -287,7 +287,7 @@ static int test_migrate_vcpu(unsigned int vcpu_idx) /* Allow the error where the vCPU thread is already finished */ TEST_ASSERT(ret == 0 || ret == ESRCH, - "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d\n", + "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d", vcpu_idx, new_pcpu, ret); return ret; @@ -326,12 +326,12 @@ static void test_run(struct kvm_vm *vm) pthread_mutex_init(&vcpu_done_map_lock, NULL); vcpu_done_map = bitmap_zalloc(test_args.nr_vcpus); - TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap\n"); + TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap"); for (i = 0; i < (unsigned long)test_args.nr_vcpus; i++) { ret = pthread_create(&pt_vcpu_run[i], NULL, test_vcpu_run, (void *)(unsigned long)i); - TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread\n", i); + TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread", i); } /* Spawn a thread to control the vCPU migrations */ @@ -340,7 +340,7 @@ static void test_run(struct kvm_vm *vm) ret = pthread_create(&pt_vcpu_migration, NULL, test_vcpu_migration, NULL); - TEST_ASSERT(!ret, "Failed to create the migration pthread\n"); + TEST_ASSERT(!ret, "Failed to create the migration pthread"); } @@ -384,7 +384,7 @@ static struct kvm_vm *test_vm_create(void) if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &test_args.offset); else - TEST_FAIL("no support for global offset\n"); + TEST_FAIL("no support for global offset"); } for (i = 0; i < nr_vcpus; i++) diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c index 31f66ba97228b..27c10e7a7e012 100644 --- a/tools/testing/selftests/kvm/aarch64/hypercalls.c +++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c @@ -175,18 +175,18 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) /* First 'read' should be an upper limit of the features supported */ vcpu_get_reg(vcpu, reg_info->reg, &val); TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), - "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx\n", + "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx", reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val); /* Test a 'write' by disabling all the features of the register map */ ret = __vcpu_set_reg(vcpu, reg_info->reg, 0); TEST_ASSERT(ret == 0, - "Failed to clear all the features of reg: 0x%lx; ret: %d\n", + "Failed to clear all the features of reg: 0x%lx; ret: %d", reg_info->reg, errno); vcpu_get_reg(vcpu, reg_info->reg, &val); TEST_ASSERT(val == 0, - "Expected all the features to be cleared for reg: 0x%lx\n", reg_info->reg); + "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg); /* * Test enabling a feature that's not supported. @@ -195,7 +195,7 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) if (reg_info->max_feat_bit < 63) { ret = __vcpu_set_reg(vcpu, reg_info->reg, BIT(reg_info->max_feat_bit + 1)); TEST_ASSERT(ret != 0 && errno == EINVAL, - "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx\n", + "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx", errno, reg_info->reg); } } @@ -216,7 +216,7 @@ static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu) */ vcpu_get_reg(vcpu, reg_info->reg, &val); TEST_ASSERT(val == 0, - "Expected all the features to be cleared for reg: 0x%lx\n", + "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg); /* @@ -226,7 +226,7 @@ static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu) */ ret = __vcpu_set_reg(vcpu, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit)); TEST_ASSERT(ret != 0 && errno == EBUSY, - "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx\n", + "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx", errno, reg_info->reg); } } @@ -265,7 +265,7 @@ static void test_guest_stage(struct kvm_vm **vm, struct kvm_vcpu **vcpu) case TEST_STAGE_HVC_IFACE_FALSE_INFO: break; default: - TEST_FAIL("Unknown test stage: %d\n", prev_stage); + TEST_FAIL("Unknown test stage: %d", prev_stage); } } @@ -294,7 +294,7 @@ static void test_run(void) REPORT_GUEST_ASSERT(uc); break; default: - TEST_FAIL("Unexpected guest exit\n"); + TEST_FAIL("Unexpected guest exit"); } } diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index 08a5ca5bed56a..53fddad57cbbc 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -414,10 +414,10 @@ static bool punch_hole_in_backing_store(struct kvm_vm *vm, if (fd != -1) { ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, paging_size); - TEST_ASSERT(ret == 0, "fallocate failed\n"); + TEST_ASSERT(ret == 0, "fallocate failed"); } else { ret = madvise(hva, paging_size, MADV_DONTNEED); - TEST_ASSERT(ret == 0, "madvise failed\n"); + TEST_ASSERT(ret == 0, "madvise failed"); } return true; @@ -501,7 +501,7 @@ static bool handle_cmd(struct kvm_vm *vm, int cmd) void fail_vcpu_run_no_handler(int ret) { - TEST_FAIL("Unexpected vcpu run failure\n"); + TEST_FAIL("Unexpected vcpu run failure"); } void fail_vcpu_run_mmio_no_syndrome_handler(int ret) diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/aarch64/smccc_filter.c index f4ceae9c89257..2d189f3da228c 100644 --- a/tools/testing/selftests/kvm/aarch64/smccc_filter.c +++ b/tools/testing/selftests/kvm/aarch64/smccc_filter.c @@ -178,7 +178,7 @@ static void expect_call_denied(struct kvm_vcpu *vcpu) struct ucall uc; if (get_ucall(vcpu, &uc) != UCALL_SYNC) - TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd); + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); TEST_ASSERT(uc.args[1] == SMCCC_RET_NOT_SUPPORTED, "Unexpected SMCCC return code: %lu", uc.args[1]); diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c index 9d51b56913496..5f9713364693b 100644 --- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c @@ -517,11 +517,11 @@ static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail) if (expect_fail) TEST_ASSERT(pmcr_orig == pmcr, - "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx\n", + "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx", pmcr, pmcr_n); else TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr), - "Failed to update PMCR.N to %lu (received: %lu)\n", + "Failed to update PMCR.N to %lu (received: %lu)", pmcr_n, get_pmcr_n(pmcr)); } @@ -594,12 +594,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) */ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), ®_val); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, - "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n", + "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(set_reg_id), reg_val); vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), ®_val); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, - "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n", + "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(clr_reg_id), reg_val); /* @@ -611,12 +611,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), ®_val); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, - "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n", + "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(set_reg_id), reg_val); vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), ®_val); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, - "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n", + "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(clr_reg_id), reg_val); } diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 41c776b642c0c..43b9a72833602 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -398,7 +398,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) int i; TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" - " num: %u\n", num); + " num: %u", num); va_start(ap, num); diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c index b5f28d21a9477..184378d593e9a 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c +++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c @@ -38,7 +38,7 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, struct list_head *iter; unsigned int nr_gic_pages, nr_vcpus_created = 0; - TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty\n"); + TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty"); /* * Make sure that the caller is infact calling this @@ -47,7 +47,7 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, list_for_each(iter, &vm->vcpus) nr_vcpus_created++; TEST_ASSERT(nr_vcpus == nr_vcpus_created, - "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)\n", + "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)", nr_vcpus, nr_vcpus_created); /* Distributor setup */ -- GitLab From 93e43e50b80b4f342251fb48f8ebced4d3c34983 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 6 Dec 2023 18:02:45 +0100 Subject: [PATCH 0310/1405] KVM: selftests: riscv: Remove redundant newlines TEST_* functions append their own newline. Remove newlines from TEST_* callsites to avoid extra newlines in output. Signed-off-by: Andrew Jones Acked-by: Anup Patel Link: https://lore.kernel.org/r/20231206170241.82801-10-ajones@ventanamicro.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/riscv/processor.c | 2 +- tools/testing/selftests/kvm/riscv/get-reg-list.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 7ca736fb41940..2bb33a8ac03c2 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -327,7 +327,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) int i; TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" - " num: %u\n", num); + " num: %u", num); va_start(ap, num); diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 6652108816db4..50bb9bb73a7b6 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -150,7 +150,7 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) /* Double check whether the desired extension was enabled */ __TEST_REQUIRE(vcpu_has_ext(vcpu, feature), - "%s not available, skipping tests\n", s->name); + "%s not available, skipping tests", s->name); } } -- GitLab From a38125f188c141cd1297d14af84c28b5276ab287 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 6 Dec 2023 18:02:46 +0100 Subject: [PATCH 0311/1405] KVM: selftests: s390x: Remove redundant newlines TEST_* functions append their own newline. Remove newlines from TEST_* callsites to avoid extra newlines in output. Signed-off-by: Andrew Jones Acked-by: Janosch Frank Acked-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20231206170241.82801-11-ajones@ventanamicro.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/lib/s390x/processor.c | 2 +- tools/testing/selftests/kvm/s390x/resets.c | 4 ++-- .../selftests/kvm/s390x/sync_regs_test.c | 20 +++++++++---------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index 15945121daf17..f6d227892cbcf 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -198,7 +198,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) int i; TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n" - " num: %u\n", + " num: %u", num); va_start(ap, num); diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c index e41e2cb8ffa97..357943f2bea87 100644 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ b/tools/testing/selftests/kvm/s390x/resets.c @@ -78,7 +78,7 @@ static void assert_noirq(struct kvm_vcpu *vcpu) * (notably, the emergency call interrupt we have injected) should * be cleared by the resets, so this should be 0. */ - TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d\n", errno); + TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d", errno); TEST_ASSERT(!irqs, "IRQ pending"); } @@ -199,7 +199,7 @@ static void inject_irq(struct kvm_vcpu *vcpu) irq->type = KVM_S390_INT_EMERGENCY; irq->u.emerg.code = vcpu->id; irqs = __vcpu_ioctl(vcpu, KVM_S390_SET_IRQ_STATE, &irq_state); - TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d\n", errno); + TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d", errno); } static struct kvm_vm *create_vm(struct kvm_vcpu **vcpu) diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c index 636a70ddac1ea..43fb25ddc3eca 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c @@ -39,13 +39,13 @@ static void guest_code(void) #define REG_COMPARE(reg) \ TEST_ASSERT(left->reg == right->reg, \ "Register " #reg \ - " values did not match: 0x%llx, 0x%llx\n", \ + " values did not match: 0x%llx, 0x%llx", \ left->reg, right->reg) #define REG_COMPARE32(reg) \ TEST_ASSERT(left->reg == right->reg, \ "Register " #reg \ - " values did not match: 0x%x, 0x%x\n", \ + " values did not match: 0x%x, 0x%x", \ left->reg, right->reg) @@ -82,14 +82,14 @@ void test_read_invalid(struct kvm_vcpu *vcpu) run->kvm_valid_regs = INVALID_SYNC_FIELD; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_valid_regs = 0; run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_valid_regs = 0; } @@ -103,14 +103,14 @@ void test_set_invalid(struct kvm_vcpu *vcpu) run->kvm_dirty_regs = INVALID_SYNC_FIELD; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_dirty_regs = 0; run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_dirty_regs = 0; } @@ -125,12 +125,12 @@ void test_req_and_verify_all_valid_regs(struct kvm_vcpu *vcpu) /* Request and verify all valid register sets. */ run->kvm_valid_regs = TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); - TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); TEST_ASSERT(run->s390_sieic.icptcode == 4 && (run->s390_sieic.ipa >> 8) == 0x83 && (run->s390_sieic.ipb >> 16) == 0x501, - "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x\n", + "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x", run->s390_sieic.icptcode, run->s390_sieic.ipa, run->s390_sieic.ipb); @@ -161,7 +161,7 @@ void test_set_and_verify_various_reg_values(struct kvm_vcpu *vcpu) } rv = _vcpu_run(vcpu); - TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1, "r11 sync regs value incorrect 0x%llx.", @@ -193,7 +193,7 @@ void test_clear_kvm_dirty_regs_bits(struct kvm_vcpu *vcpu) run->s.regs.gprs[11] = 0xDEADBEEF; run->s.regs.diag318 = 0x4B1D; rv = _vcpu_run(vcpu); - TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF, "r11 sync regs value incorrect 0x%llx.", -- GitLab From 0adf963b8463faa44653e22e56ce55f747e68868 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 28 Jan 2024 00:32:43 +0800 Subject: [PATCH 0312/1405] ASoC: sunxi: sun4i-spdif: Add support for Allwinner H616 The SPDIF hardware block found in the H616 SoC has the same layout as the one found in the H6 SoC, except that it is missing the receiver side. Since the driver currently only supports the transmit function, support for the H616 is identical to what is currently done for the H6. Signed-off-by: Chen-Yu Tsai Reviewed-by: Andre Przywara Reviewed-by: Jernej Skrabec Link: https://msgid.link/r/20240127163247.384439-4-wens@kernel.org Signed-off-by: Mark Brown --- sound/soc/sunxi/sun4i-spdif.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/sunxi/sun4i-spdif.c b/sound/soc/sunxi/sun4i-spdif.c index 702386823d172..f41c309558579 100644 --- a/sound/soc/sunxi/sun4i-spdif.c +++ b/sound/soc/sunxi/sun4i-spdif.c @@ -577,6 +577,11 @@ static const struct of_device_id sun4i_spdif_of_match[] = { .compatible = "allwinner,sun50i-h6-spdif", .data = &sun50i_h6_spdif_quirks, }, + { + .compatible = "allwinner,sun50i-h616-spdif", + /* Essentially the same as the H6, but without RX */ + .data = &sun50i_h6_spdif_quirks, + }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, sun4i_spdif_of_match); -- GitLab From 57b3c130d97e45b8a07586e0ae113e43776c5ea8 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 28 Jan 2024 00:32:41 +0800 Subject: [PATCH 0313/1405] ASoC: sun4i-spdif: Fix requirements for H6 When the H6 was added to the bindings, only the TX DMA channel was added. As the hardware supports both transmit and receive functions, the binding is missing the RX DMA channel and is thus incorrect. Also, the reset control was not made mandatory. Add the RX DMA channel for SPDIF on H6 by removing the compatible from the list of compatibles that should only have a TX DMA channel. And add the H6 compatible to the list of compatibles that require the reset control to be present. Fixes: b20453031472 ("dt-bindings: sound: sun4i-spdif: Add Allwinner H6 compatible") Signed-off-by: Chen-Yu Tsai Acked-by: Conor Dooley Reviewed-by: Jernej Skrabec Link: https://msgid.link/r/20240127163247.384439-2-wens@kernel.org Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml index 8108c564dd78a..98e2e053fa191 100644 --- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml +++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml @@ -62,6 +62,7 @@ allOf: enum: - allwinner,sun6i-a31-spdif - allwinner,sun8i-h3-spdif + - allwinner,sun50i-h6-spdif then: required: @@ -73,7 +74,6 @@ allOf: contains: enum: - allwinner,sun8i-h3-spdif - - allwinner,sun50i-h6-spdif then: properties: -- GitLab From 7a9dc944f129bb56ef855d9c0b0647bc3e98a56f Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 28 Jan 2024 00:32:42 +0800 Subject: [PATCH 0314/1405] ASoC: sun4i-spdif: Add Allwinner H616 compatible The SPDIF hardware block found in the H616 SoC has the same layout as the one found in the H6 SoC, except that it is missing the receiver side. Add a new compatible string for it. Signed-off-by: Chen-Yu Tsai Acked-by: Conor Dooley Reviewed-by: Jernej Skrabec Link: https://msgid.link/r/20240127163247.384439-3-wens@kernel.org Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml index 98e2e053fa191..aa32dc950e72c 100644 --- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml +++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml @@ -22,6 +22,7 @@ properties: - const: allwinner,sun6i-a31-spdif - const: allwinner,sun8i-h3-spdif - const: allwinner,sun50i-h6-spdif + - const: allwinner,sun50i-h616-spdif - items: - const: allwinner,sun8i-a83t-spdif - const: allwinner,sun8i-h3-spdif @@ -63,6 +64,7 @@ allOf: - allwinner,sun6i-a31-spdif - allwinner,sun8i-h3-spdif - allwinner,sun50i-h6-spdif + - allwinner,sun50i-h616-spdif then: required: @@ -74,6 +76,7 @@ allOf: contains: enum: - allwinner,sun8i-h3-spdif + - allwinner,sun50i-h616-spdif then: properties: -- GitLab From f1f6a6b1830a8f1dc92ee26fae76333f446b0663 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 11 Dec 2023 18:05:52 -0800 Subject: [PATCH 0315/1405] e1000e: correct maximum frequency adjustment values The e1000e driver supports hardware with a variety of different clock speeds, and thus a variety of different increment values used for programming its PTP hardware clock. The values currently programmed in e1000e_ptp_init are incorrect. In particular, only two maximum adjustments are used: 24000000 - 1, and 600000000 - 1. These were originally intended to be used with the 96 MHz clock and the 25 MHz clock. Both of these values are actually slightly too high. For the 96 MHz clock, the actual maximum value that can safely be programmed is 23,999,938. For the 25 MHz clock, the maximum value is 599,999,904. Worse, several devices use a 24 MHz clock or a 38.4 MHz clock. These parts are incorrectly assigned one of either the 24million or 600million values. For the 24 MHz clock, this is not a significant issue: its current increment value can support an adjustment up to 7billion in the positive direction. However, the 38.4 KHz clock uses an increment value which can only support up to 230,769,157 before it starts overflowing. To understand where these values come from, consider that frequency adjustments have the form of: new_incval = base_incval + (base_incval * adjustment) / (unit of adjustment) The maximum adjustment is reported in terms of parts per billion: new_incval = base_incval + (base_incval * adjustment) / 1 billion The largest possible adjustment is thus given by the following: max_incval = base_incval + (base_incval * max_adj) / 1 billion Re-arranging to solve for max_adj: max_adj = (max_incval - base_incval) * 1 billion / base_incval We also need to ensure that negative adjustments cannot underflow. This can be achieved simply by ensuring max_adj is always less than 1 billion. Introduce new macros in e1000.h codifying the maximum adjustment in PPB for each frequency given its associated increment values. Also clarify where these values come from by commenting about the above equations. Replace the switch statement in e1000e_ptp_init with one which mirrors the increment value switch statement from e1000e_get_base_timinica. For each device, assign the appropriate maximum adjustment based on its frequency. Some parts can have one of two frequency modes as determined by E1000_TSYNCRXCTL_SYSCFI. Since the new flow directly matches the assignments in e1000e_get_base_timinca, and uses well defined macro names, it is much easier to verify that the resulting maximum adjustments are correct. It also avoids difficult to parse construction such as the "hw->mac.type < e1000_phc_lpt", and the use of fallthrough which was especially confusing when combined with a conditional block. Note that I believe the current increment value configuration used for 24MHz clocks is sub-par, as it leaves at least 3 extra bits available in the INCVALUE register. However, fixing that requires more careful review of the clock rate and associated values. Reported-by: Trey Harrison Fixes: 68fe1d5da548 ("e1000e: Add Support for 38.4MHZ frequency") Fixes: d89777bf0e42 ("e1000e: add support for IEEE-1588 PTP") Signed-off-by: Jacob Keller Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/e1000.h | 20 ++++++++++++++++++++ drivers/net/ethernet/intel/e1000e/ptp.c | 22 +++++++++++++++------- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index a187582d22994..ba9c19e6994c9 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -360,23 +360,43 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca); * As a result, a shift of INCVALUE_SHIFT_n is used to fit a value of * INCVALUE_n into the TIMINCA register allowing 32+8+(24-INCVALUE_SHIFT_n) * bits to count nanoseconds leaving the rest for fractional nonseconds. + * + * Any given INCVALUE also has an associated maximum adjustment value. This + * maximum adjustment value is the largest increase (or decrease) which can be + * safely applied without overflowing the INCVALUE. Since INCVALUE has + * a maximum range of 24 bits, its largest value is 0xFFFFFF. + * + * To understand where the maximum value comes from, consider the following + * equation: + * + * new_incval = base_incval + (base_incval * adjustment) / 1billion + * + * To avoid overflow that means: + * max_incval = base_incval + (base_incval * max_adj) / billion + * + * Re-arranging: + * max_adj = floor(((max_incval - base_incval) * 1billion) / 1billion) */ #define INCVALUE_96MHZ 125 #define INCVALUE_SHIFT_96MHZ 17 #define INCPERIOD_SHIFT_96MHZ 2 #define INCPERIOD_96MHZ (12 >> INCPERIOD_SHIFT_96MHZ) +#define MAX_PPB_96MHZ 23999900 /* 23,999,900 ppb */ #define INCVALUE_25MHZ 40 #define INCVALUE_SHIFT_25MHZ 18 #define INCPERIOD_25MHZ 1 +#define MAX_PPB_25MHZ 599999900 /* 599,999,900 ppb */ #define INCVALUE_24MHZ 125 #define INCVALUE_SHIFT_24MHZ 14 #define INCPERIOD_24MHZ 3 +#define MAX_PPB_24MHZ 999999999 /* 999,999,999 ppb */ #define INCVALUE_38400KHZ 26 #define INCVALUE_SHIFT_38400KHZ 19 #define INCPERIOD_38400KHZ 1 +#define MAX_PPB_38400KHZ 230769100 /* 230,769,100 ppb */ /* Another drawback of scaling the incvalue by a large factor is the * 64-bit SYSTIM register overflows more quickly. This is dealt with diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 02d871bc112a7..bbcfd529399b0 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -280,8 +280,17 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) switch (hw->mac.type) { case e1000_pch2lan: + adapter->ptp_clock_info.max_adj = MAX_PPB_96MHZ; + break; case e1000_pch_lpt: + if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) + adapter->ptp_clock_info.max_adj = MAX_PPB_96MHZ; + else + adapter->ptp_clock_info.max_adj = MAX_PPB_25MHZ; + break; case e1000_pch_spt: + adapter->ptp_clock_info.max_adj = MAX_PPB_24MHZ; + break; case e1000_pch_cnp: case e1000_pch_tgp: case e1000_pch_adp: @@ -289,15 +298,14 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) case e1000_pch_lnp: case e1000_pch_ptp: case e1000_pch_nvp: - if ((hw->mac.type < e1000_pch_lpt) || - (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)) { - adapter->ptp_clock_info.max_adj = 24000000 - 1; - break; - } - fallthrough; + if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) + adapter->ptp_clock_info.max_adj = MAX_PPB_24MHZ; + else + adapter->ptp_clock_info.max_adj = MAX_PPB_38400KHZ; + break; case e1000_82574: case e1000_82583: - adapter->ptp_clock_info.max_adj = 600000000 - 1; + adapter->ptp_clock_info.max_adj = MAX_PPB_25MHZ; break; default: break; -- GitLab From bbc404d20d1b46d89b461918bc44587620eda200 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 20 Jan 2024 18:25:36 +0100 Subject: [PATCH 0316/1405] ixgbe: Fix an error handling path in ixgbe_read_iosf_sb_reg_x550() All error handling paths, except this one, go to 'out' where release_swfw_sync() is called. This call balances the acquire_swfw_sync() call done at the beginning of the function. Branch to the error handling path in order to correctly release some resources in case of error. Fixes: ae14a1d8e104 ("ixgbe: Fix IOSF SB access issues") Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 6208923e29a2b..c1adc94a5a657 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -716,7 +716,8 @@ static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) { error = FIELD_GET(IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK, command); hw_dbg(hw, "Failed to read, error %x\n", error); - return -EIO; + ret = -EIO; + goto out; } if (!ret) -- GitLab From ccbca118ef1a71d5faa012b9bb1ecd784e9e2b42 Mon Sep 17 00:00:00 2001 From: Samasth Norway Ananda Date: Mon, 22 Jan 2024 21:35:09 -0800 Subject: [PATCH 0317/1405] NFSv4.1: Assign the right value for initval and retries for rpc timeout Make sure the rpc timeout was assigned with the correct value for initial timeout and max number of retries. Fixes: 57331a59ac0d ("NFSv4.1: Use the nfs_client's rpc timeouts for backchannel") Signed-off-by: Samasth Norway Ananda Reviewed-by: Benjamin Coddington Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- net/sunrpc/svc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index f60c93e5a25d6..b969e505c7b77 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1598,10 +1598,10 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp) /* Finally, send the reply synchronously */ if (rqstp->bc_to_initval > 0) { timeout.to_initval = rqstp->bc_to_initval; - timeout.to_retries = rqstp->bc_to_initval; + timeout.to_retries = rqstp->bc_to_retries; } else { timeout.to_initval = req->rq_xprt->timeout->to_initval; - timeout.to_initval = req->rq_xprt->timeout->to_retries; + timeout.to_retries = req->rq_xprt->timeout->to_retries; } memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); task = rpc_run_bc_task(req, &timeout); -- GitLab From 0abcac4fe3ca3aeaef40ad53c0b295b2d5f8cbf1 Mon Sep 17 00:00:00 2001 From: Samasth Norway Ananda Date: Thu, 25 Jan 2024 13:19:33 -0800 Subject: [PATCH 0318/1405] firmware: microchip: fix wrong sizeof argument response_msg is a pointer to an unsigned int (u32). So passing just response_msg to sizeof would not print the size of the variable. To get the size of response_msg we need to pass it as a pointer variable. Fixes: ec5b0f1193ad ("firmware: microchip: add PolarFire SoC Auto Update support") Signed-off-by: Samasth Norway Ananda Signed-off-by: Conor Dooley --- drivers/firmware/microchip/mpfs-auto-update.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/microchip/mpfs-auto-update.c b/drivers/firmware/microchip/mpfs-auto-update.c index 81f5f62e34fce..682e417be5a3e 100644 --- a/drivers/firmware/microchip/mpfs-auto-update.c +++ b/drivers/firmware/microchip/mpfs-auto-update.c @@ -167,7 +167,7 @@ static int mpfs_auto_update_verify_image(struct fw_upload *fw_uploader) u32 *response_msg; int ret; - response_msg = devm_kzalloc(priv->dev, AUTO_UPDATE_FEATURE_RESP_SIZE * sizeof(response_msg), + response_msg = devm_kzalloc(priv->dev, AUTO_UPDATE_FEATURE_RESP_SIZE * sizeof(*response_msg), GFP_KERNEL); if (!response_msg) return -ENOMEM; -- GitLab From 5513c5d0fb3d509cdd0a11afc18441c57eb7c94c Mon Sep 17 00:00:00 2001 From: Marian Postevca Date: Sun, 28 Jan 2024 19:22:29 +0200 Subject: [PATCH 0319/1405] ASoC: amd: acp: Fix support for a Huawei Matebook laptop Previous commit that added support for Huawei MateBook D16 2021 with Ryzen 4600H (HVY-WXX9 M1010) was incomplete. To activate support for this laptop, the DMI table in acp3x-es83xx machine driver must also be updated. Fixes: b5338b1b901e ("ASoC: amd: acp: Add support for a new Huawei Matebook laptop") Signed-off-by: Marian Postevca Link: https://msgid.link/r/20240128172229.657142-1-posteuca@mutex.one Signed-off-by: Mark Brown --- sound/soc/amd/acp/acp3x-es83xx/acp3x-es83xx.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/amd/acp/acp3x-es83xx/acp3x-es83xx.c b/sound/soc/amd/acp/acp3x-es83xx/acp3x-es83xx.c index f85b85ea4be9c..2b0aa270a3e9d 100644 --- a/sound/soc/amd/acp/acp3x-es83xx/acp3x-es83xx.c +++ b/sound/soc/amd/acp/acp3x-es83xx/acp3x-es83xx.c @@ -354,6 +354,14 @@ static const struct dmi_system_id acp3x_es83xx_dmi_table[] = { }, .driver_data = (void *)(ES83XX_ENABLE_DMIC|ES83XX_48_MHZ_MCLK), }, + { + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "HUAWEI"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "HVY-WXX9"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "M1010"), + }, + .driver_data = (void *)(ES83XX_ENABLE_DMIC), + }, { .matches = { DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "HUAWEI"), -- GitLab From e84b01a880f635e3084a361afba41f95ff500d12 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Jan 2024 11:52:54 -0700 Subject: [PATCH 0320/1405] io_uring/poll: move poll execution helpers higher up In preparation for calling __io_poll_execute() higher up, move the functions to avoid forward declarations. No functional changes in this patch. Signed-off-by: Jens Axboe --- io_uring/poll.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/io_uring/poll.c b/io_uring/poll.c index d59b74a99d4e4..785a5b1910038 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -228,6 +228,26 @@ enum { IOU_POLL_REISSUE = 3, }; +static void __io_poll_execute(struct io_kiocb *req, int mask) +{ + unsigned flags = 0; + + io_req_set_res(req, mask, 0); + req->io_task_work.func = io_poll_task_func; + + trace_io_uring_task_add(req, mask); + + if (!(req->flags & REQ_F_POLL_NO_LAZY)) + flags = IOU_F_TWQ_LAZY_WAKE; + __io_req_task_work_add(req, flags); +} + +static inline void io_poll_execute(struct io_kiocb *req, int res) +{ + if (io_poll_get_ownership(req)) + __io_poll_execute(req, res); +} + /* * All poll tw should go through this. Checks for poll events, manages * references, does rewait, etc. @@ -364,26 +384,6 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts) } } -static void __io_poll_execute(struct io_kiocb *req, int mask) -{ - unsigned flags = 0; - - io_req_set_res(req, mask, 0); - req->io_task_work.func = io_poll_task_func; - - trace_io_uring_task_add(req, mask); - - if (!(req->flags & REQ_F_POLL_NO_LAZY)) - flags = IOU_F_TWQ_LAZY_WAKE; - __io_req_task_work_add(req, flags); -} - -static inline void io_poll_execute(struct io_kiocb *req, int res) -{ - if (io_poll_get_ownership(req)) - __io_poll_execute(req, res); -} - static void io_poll_cancel_req(struct io_kiocb *req) { io_poll_mark_cancelled(req); -- GitLab From 91e5d765a82fb2c9d0b7ad930d8953208081ddf1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Jan 2024 11:54:18 -0700 Subject: [PATCH 0321/1405] io_uring/net: un-indent mshot retry path in io_recv_finish() In preparation for putting some retry logic in there, have the done path just skip straight to the end rather than have too much nesting in here. No functional changes in this patch. Signed-off-by: Jens Axboe --- io_uring/net.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 75d494dad7e2c..740c6bfa5b590 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -645,23 +645,27 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, return true; } - if (!mshot_finished) { - if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER, - *ret, cflags | IORING_CQE_F_MORE)) { - io_recv_prep_retry(req); - /* Known not-empty or unknown state, retry */ - if (cflags & IORING_CQE_F_SOCK_NONEMPTY || - msg->msg_inq == -1) - return false; - if (issue_flags & IO_URING_F_MULTISHOT) - *ret = IOU_ISSUE_SKIP_COMPLETE; - else - *ret = -EAGAIN; - return true; - } - /* Otherwise stop multishot but use the current result. */ - } + if (mshot_finished) + goto finish; + /* + * Fill CQE for this receive and see if we should keep trying to + * receive from this socket. + */ + if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER, + *ret, cflags | IORING_CQE_F_MORE)) { + io_recv_prep_retry(req); + /* Known not-empty or unknown state, retry */ + if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1) + return false; + if (issue_flags & IO_URING_F_MULTISHOT) + *ret = IOU_ISSUE_SKIP_COMPLETE; + else + *ret = -EAGAIN; + return true; + } + /* Otherwise stop multishot but use the current result. */ +finish: io_req_set_res(req, *ret, cflags); if (issue_flags & IO_URING_F_MULTISHOT) -- GitLab From 704ea888d646cb9d715662944cf389c823252ee0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Jan 2024 11:57:11 -0700 Subject: [PATCH 0322/1405] io_uring/poll: add requeue return code from poll multishot handling Since our poll handling is edge triggered, multishot handlers retry internally until they know that no more data is available. In preparation for limiting these retries, add an internal return code, IOU_REQUEUE, which can be used to inform the poll backend about the handler wanting to retry, but that this should happen through a normal task_work requeue rather than keep hammering on the issue side for this one request. No functional changes in this patch, nobody is using this return code just yet. Signed-off-by: Jens Axboe --- io_uring/io_uring.h | 8 +++++++- io_uring/poll.c | 9 ++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 04e33f25919ca..d5495710c1787 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -15,11 +15,17 @@ #include #endif - enum { IOU_OK = 0, IOU_ISSUE_SKIP_COMPLETE = -EIOCBQUEUED, + /* + * Requeue the task_work to restart operations on this request. The + * actual value isn't important, should just be not an otherwise + * valid error code, yet less than -MAX_ERRNO and valid internally. + */ + IOU_REQUEUE = -3072, + /* * Intended only when both IO_URING_F_MULTISHOT is passed * to indicate to the poll runner that multishot should be diff --git a/io_uring/poll.c b/io_uring/poll.c index 785a5b1910038..7513afc7b702e 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -226,6 +226,7 @@ enum { IOU_POLL_NO_ACTION = 1, IOU_POLL_REMOVE_POLL_USE_RES = 2, IOU_POLL_REISSUE = 3, + IOU_POLL_REQUEUE = 4, }; static void __io_poll_execute(struct io_kiocb *req, int mask) @@ -329,6 +330,8 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts) int ret = io_poll_issue(req, ts); if (ret == IOU_STOP_MULTISHOT) return IOU_POLL_REMOVE_POLL_USE_RES; + else if (ret == IOU_REQUEUE) + return IOU_POLL_REQUEUE; if (ret < 0) return ret; } @@ -351,8 +354,12 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts) int ret; ret = io_poll_check_events(req, ts); - if (ret == IOU_POLL_NO_ACTION) + if (ret == IOU_POLL_NO_ACTION) { return; + } else if (ret == IOU_POLL_REQUEUE) { + __io_poll_execute(req, 0); + return; + } io_poll_remove_entries(req); io_poll_tw_hash_eject(req, ts); -- GitLab From 76b367a2d83163cf19173d5cb0b562acbabc8eac Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Jan 2024 12:00:58 -0700 Subject: [PATCH 0323/1405] io_uring/net: limit inline multishot retries If we have multiple clients and some/all are flooding the receives to such an extent that we can retry a LOT handling multishot receives, then we can be starving some clients and hence serving traffic in an imbalanced fashion. Limit multishot retry attempts to some arbitrary value, whose only purpose serves to ensure that we don't keep serving a single connection for way too long. We default to 32 retries, which should be more than enough to provide fairness, yet not so small that we'll spend too much time requeuing rather than handling traffic. Cc: stable@vger.kernel.org Depends-on: 704ea888d646 ("io_uring/poll: add requeue return code from poll multishot handling") Depends-on: 1e5d765a82f ("io_uring/net: un-indent mshot retry path in io_recv_finish()") Depends-on: e84b01a880f6 ("io_uring/poll: move poll execution helpers higher up") Fixes: b3fdea6ecb55 ("io_uring: multishot recv") Fixes: 9bb66906f23e ("io_uring: support multishot in recvmsg") Link: https://github.com/axboe/liburing/issues/1043 Signed-off-by: Jens Axboe --- io_uring/net.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 740c6bfa5b590..a12ff69e68434 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -60,6 +60,7 @@ struct io_sr_msg { unsigned len; unsigned done_io; unsigned msg_flags; + unsigned nr_multishot_loops; u16 flags; /* initialised and used only by !msg send variants */ u16 addr_len; @@ -70,6 +71,13 @@ struct io_sr_msg { struct io_kiocb *notif; }; +/* + * Number of times we'll try and do receives if there's more data. If we + * exceed this limit, then add us to the back of the queue and retry from + * there. This helps fairness between flooding clients. + */ +#define MULTISHOT_MAX_RETRY 32 + static inline bool io_check_multishot(struct io_kiocb *req, unsigned int issue_flags) { @@ -611,6 +619,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) sr->msg_flags |= MSG_CMSG_COMPAT; #endif sr->done_io = 0; + sr->nr_multishot_loops = 0; return 0; } @@ -654,12 +663,20 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, */ if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER, *ret, cflags | IORING_CQE_F_MORE)) { + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE; + io_recv_prep_retry(req); /* Known not-empty or unknown state, retry */ - if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1) - return false; + if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1) { + if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY) + return false; + /* mshot retries exceeded, force a requeue */ + sr->nr_multishot_loops = 0; + mshot_retry_ret = IOU_REQUEUE; + } if (issue_flags & IO_URING_F_MULTISHOT) - *ret = IOU_ISSUE_SKIP_COMPLETE; + *ret = mshot_retry_ret; else *ret = -EAGAIN; return true; -- GitLab From eef5c7b28dbecd6b141987a96db6c54e49828102 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Mon, 29 Jan 2024 13:18:56 +0000 Subject: [PATCH 0324/1405] cxl/pci: Skip to handle RAS errors if CXL.mem device is detached The PCI AER model is an awkward fit for CXL error handling. While the expectation is that a PCI device can escalate to link reset to recover from an AER event, the same reset on CXL amounts to a surprise memory hotplug of massive amounts of memory. At present, the CXL error handler attempts some optimistic error handling to unbind the device from the cxl_mem driver after reaping some RAS register values. This results in a "hopeful" attempt to unplug the memory, but there is no guarantee that will succeed. A subsequent AER notification after the memdev unbind event can no longer assume the registers are mapped. Check for memdev bind before reaping status register values to avoid crashes of the form: BUG: unable to handle page fault for address: ffa00000195e9100 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page [...] RIP: 0010:__cxl_handle_ras+0x30/0x110 [cxl_core] [...] Call Trace: ? __die+0x24/0x70 ? page_fault_oops+0x82/0x160 ? kernelmode_fixup_or_oops+0x84/0x110 ? exc_page_fault+0x113/0x170 ? asm_exc_page_fault+0x26/0x30 ? __pfx_dpc_reset_link+0x10/0x10 ? __cxl_handle_ras+0x30/0x110 [cxl_core] ? find_cxl_port+0x59/0x80 [cxl_core] cxl_handle_rp_ras+0xbc/0xd0 [cxl_core] cxl_error_detected+0x6c/0xf0 [cxl_core] report_error_detected+0xc7/0x1c0 pci_walk_bus+0x73/0x90 pcie_do_recovery+0x23f/0x330 Longer term, the unbind and PCI_ERS_RESULT_DISCONNECT behavior might need to be replaced with a new PCI_ERS_RESULT_PANIC. Fixes: 6ac07883dbb5 ("cxl/pci: Add RCH downstream port error logging") Cc: stable@vger.kernel.org Suggested-by: Dan Williams Signed-off-by: Li Ming Link: https://lore.kernel.org/r/20240129131856.2458980-1-ming4.li@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 43 ++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 6c9c8d92f8f71..480489f5644e1 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -932,11 +932,21 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } void cxl_cor_error_detected(struct pci_dev *pdev) { struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct device *dev = &cxlds->cxlmd->dev; + + scoped_guard(device, dev) { + if (!dev->driver) { + dev_warn(&pdev->dev, + "%s: memdev disabled, abort error handling\n", + dev_name(dev)); + return; + } - if (cxlds->rcd) - cxl_handle_rdport_errors(cxlds); + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); - cxl_handle_endpoint_cor_ras(cxlds); + cxl_handle_endpoint_cor_ras(cxlds); + } } EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); @@ -948,16 +958,25 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, struct device *dev = &cxlmd->dev; bool ue; - if (cxlds->rcd) - cxl_handle_rdport_errors(cxlds); + scoped_guard(device, dev) { + if (!dev->driver) { + dev_warn(&pdev->dev, + "%s: memdev disabled, abort error handling\n", + dev_name(dev)); + return PCI_ERS_RESULT_DISCONNECT; + } + + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + /* + * A frozen channel indicates an impending reset which is fatal to + * CXL.mem operation, and will likely crash the system. On the off + * chance the situation is recoverable dump the status of the RAS + * capability registers and bounce the active state of the memdev. + */ + ue = cxl_handle_endpoint_ras(cxlds); + } - /* - * A frozen channel indicates an impending reset which is fatal to - * CXL.mem operation, and will likely crash the system. On the off - * chance the situation is recoverable dump the status of the RAS - * capability registers and bounce the active state of the memdev. - */ - ue = cxl_handle_endpoint_ras(cxlds); switch (state) { case pci_channel_io_normal: -- GitLab From c44fc98f0a8ffd94fa0bd291928e7e312ffc7ca4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= Date: Fri, 26 Jan 2024 11:49:35 +0100 Subject: [PATCH 0325/1405] net: dsa: qca8k: fix illegal usage of GPIO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When working with GPIO, its direction must be set either when the GPIO is requested by gpiod_get*() or later on by one of the gpiod_direction_*() functions. Neither of this is done here which results in undefined behavior on some systems. As the reset GPIO is used right after it is requested here, it makes sense to configure it as GPIOD_OUT_HIGH right away. With that, the following gpiod_set_value_cansleep(1) becomes redundant and can be safely removed. Fixes: a653f2f538f9 ("net: dsa: qca8k: introduce reset via gpio feature") Signed-off-by: Michal Vokáč Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/1706266175-3408-1-git-send-email-michal.vokac@ysoft.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/qca/qca8k-8xxx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index c51f40960961f..7a864329cb726 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -2051,12 +2051,11 @@ qca8k_sw_probe(struct mdio_device *mdiodev) priv->info = of_device_get_match_data(priv->dev); priv->reset_gpio = devm_gpiod_get_optional(priv->dev, "reset", - GPIOD_ASIS); + GPIOD_OUT_HIGH); if (IS_ERR(priv->reset_gpio)) return PTR_ERR(priv->reset_gpio); if (priv->reset_gpio) { - gpiod_set_value_cansleep(priv->reset_gpio, 1); /* The active low duration must be greater than 10 ms * and checkpatch.pl wants 20 ms. */ -- GitLab From 59c93583491ab15db109f9902524d241c4fa4c0b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 26 Jan 2024 12:13:08 -0800 Subject: [PATCH 0326/1405] selftests: net: add missing config for nftables-backed iptables Modern OSes use iptables implementation with nf_tables as a backend, e.g.: $ iptables -V iptables v1.8.8 (nf_tables) Pablo points out that we need CONFIG_NFT_COMPAT to make that work, otherwise we see a lot of: Warning: Extension DNAT revision 0 not supported, missing kernel module? with DNAT being just an example here, other modules we need include udp, TTL, length etc. Link: https://lore.kernel.org/r/20240126201308.2903602-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 56da5d52674cf..d4b38177ed04a 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -60,6 +60,7 @@ CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y +CONFIG_NFT_COMPAT=m CONFIG_NF_FLOW_TABLE=m CONFIG_PSAMPLE=m CONFIG_TCP_MD5SIG=y -- GitLab From 881f78f472556ed05588172d5b5676b48dc48240 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 29 Jan 2024 20:27:23 -0800 Subject: [PATCH 0327/1405] xfs: remove conditional building of rt geometry validator functions I mistakenly turned off CONFIG_XFS_RT in the Kconfig file for arm64 variant of the djwong-wtf git branch. Unfortunately, it took me a good hour to figure out that RT wasn't built because this is what got printed to dmesg: XFS (sda2): realtime geometry sanity check failed XFS (sda2): Metadata corruption detected at xfs_sb_read_verify+0x170/0x190 [xfs], xfs_sb block 0x0 Whereas I would have expected: XFS (sda2): Not built with CONFIG_XFS_RT XFS (sda2): RT mount failed The root cause of these problems is the conditional compilation of the new functions xfs_validate_rtextents and xfs_compute_rextslog that I introduced in the two commits listed below. The !RT versions of these functions return false and 0, respectively, which causes primary superblock validation to fail, which explains the first message. Move the two functions to other parts of libxfs that are not conditionally defined by CONFIG_XFS_RT and remove the broken stubs so that validation works again. Fixes: e14293803f4e ("xfs: don't allow overly small or large realtime volumes") Fixes: a6a38f309afc ("xfs: make rextslog computation consistent with mkfs") Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/libxfs/xfs_rtbitmap.c | 14 -------------- fs/xfs/libxfs/xfs_rtbitmap.h | 16 ---------------- fs/xfs/libxfs/xfs_sb.c | 14 ++++++++++++++ fs/xfs/libxfs/xfs_sb.h | 2 ++ fs/xfs/libxfs/xfs_types.h | 12 ++++++++++++ fs/xfs/scrub/rtbitmap.c | 1 + fs/xfs/scrub/rtsummary.c | 1 + 7 files changed, 30 insertions(+), 30 deletions(-) diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 31100120b2c58..e31663cb7b434 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -1118,20 +1118,6 @@ xfs_rtbitmap_blockcount( return howmany_64(rtextents, NBBY * mp->m_sb.sb_blocksize); } -/* - * Compute the maximum level number of the realtime summary file, as defined by - * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct - * use of rt volumes with more than 2^32 extents. - */ -uint8_t -xfs_compute_rextslog( - xfs_rtbxlen_t rtextents) -{ - if (!rtextents) - return 0; - return xfs_highbit64(rtextents); -} - /* * Compute the number of rtbitmap words needed to populate every block of a * bitmap that is large enough to track the given number of rt extents. diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h index 274dc7dae1faf..152a66750af55 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.h +++ b/fs/xfs/libxfs/xfs_rtbitmap.h @@ -351,20 +351,6 @@ xfs_rtfree_extent( int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, xfs_filblks_t rtlen); -uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); - -/* Do we support an rt volume having this number of rtextents? */ -static inline bool -xfs_validate_rtextents( - xfs_rtbxlen_t rtextents) -{ - /* No runt rt volumes */ - if (rtextents == 0) - return false; - - return true; -} - xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents); unsigned long long xfs_rtbitmap_wordcount(struct xfs_mount *mp, @@ -383,8 +369,6 @@ unsigned long long xfs_rtsummary_wordcount(struct xfs_mount *mp, # define xfs_rtsummary_read_buf(a,b) (-ENOSYS) # define xfs_rtbuf_cache_relse(a) (0) # define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS) -# define xfs_compute_rextslog(rtx) (0) -# define xfs_validate_rtextents(rtx) (false) static inline xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents) { diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 4a9e8588f4c98..5bb6e2bd6deee 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -1377,3 +1377,17 @@ xfs_validate_stripe_geometry( } return true; } + +/* + * Compute the maximum level number of the realtime summary file, as defined by + * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct + * use of rt volumes with more than 2^32 extents. + */ +uint8_t +xfs_compute_rextslog( + xfs_rtbxlen_t rtextents) +{ + if (!rtextents) + return 0; + return xfs_highbit64(rtextents); +} diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 19134b23c10be..2e8e8d63d4eb2 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -38,4 +38,6 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp, extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp, __s64 sunit, __s64 swidth, int sectorsize, bool silent); +uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); + #endif /* __XFS_SB_H__ */ diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 20b5375f2d9c9..62e02d5380ad3 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -251,4 +251,16 @@ bool xfs_verify_fileoff(struct xfs_mount *mp, xfs_fileoff_t off); bool xfs_verify_fileext(struct xfs_mount *mp, xfs_fileoff_t off, xfs_fileoff_t len); +/* Do we support an rt volume having this number of rtextents? */ +static inline bool +xfs_validate_rtextents( + xfs_rtbxlen_t rtextents) +{ + /* No runt rt volumes */ + if (rtextents == 0) + return false; + + return true; +} + #endif /* __XFS_TYPES_H__ */ diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index 441ca99776527..46583517377ff 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -15,6 +15,7 @@ #include "xfs_inode.h" #include "xfs_bmap.h" #include "xfs_bit.h" +#include "xfs_sb.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/repair.h" diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c index fabd0ed9dfa67..b1ff4f33324a7 100644 --- a/fs/xfs/scrub/rtsummary.c +++ b/fs/xfs/scrub/rtsummary.c @@ -16,6 +16,7 @@ #include "xfs_rtbitmap.h" #include "xfs_bit.h" #include "xfs_bmap.h" +#include "xfs_sb.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" -- GitLab From 60365049ccbacd101654a66ddcb299abfabd4fc5 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 26 Jan 2024 09:32:20 +0100 Subject: [PATCH 0328/1405] ipv6: Ensure natural alignment of const ipv6 loopback and router addresses On a parisc64 kernel I sometimes notice this kernel warning: Kernel unaligned access to 0x40ff8814 at ndisc_send_skb+0xc0/0x4d8 The address 0x40ff8814 points to the in6addr_linklocal_allrouters variable and the warning simply means that some ipv6 function tries to read a 64-bit word directly from the not-64-bit aligned in6addr_linklocal_allrouters variable. Unaligned accesses are non-critical as the architecture or exception handlers usually will fix it up at runtime. Nevertheless it may trigger a performance penality for some architectures. For details read the "unaligned-memory-access" kernel documentation. The patch below ensures that the ipv6 loopback and router addresses will always be naturally aligned. This prevents the unaligned accesses for all architectures. Signed-off-by: Helge Deller Fixes: 034dfc5df99eb ("ipv6: export in6addr_loopback to modules") Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/ZbNuFM1bFqoH-UoY@p100 Signed-off-by: Paolo Abeni --- net/ipv6/addrconf_core.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 507a8353a6bdb..c008d21925d7f 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -220,19 +220,26 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { EXPORT_SYMBOL_GPL(ipv6_stub); /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ -const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; +const struct in6_addr in6addr_loopback __aligned(BITS_PER_LONG/8) + = IN6ADDR_LOOPBACK_INIT; EXPORT_SYMBOL(in6addr_loopback); -const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; +const struct in6_addr in6addr_any __aligned(BITS_PER_LONG/8) + = IN6ADDR_ANY_INIT; EXPORT_SYMBOL(in6addr_any); -const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_linklocal_allnodes __aligned(BITS_PER_LONG/8) + = IN6ADDR_LINKLOCAL_ALLNODES_INIT; EXPORT_SYMBOL(in6addr_linklocal_allnodes); -const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_linklocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_linklocal_allrouters); -const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_interfacelocal_allnodes __aligned(BITS_PER_LONG/8) + = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; EXPORT_SYMBOL(in6addr_interfacelocal_allnodes); -const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_interfacelocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_interfacelocal_allrouters); -const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_sitelocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_sitelocal_allrouters); static void snmp6_free_dev(struct inet6_dev *idev) -- GitLab From 2fa28abd1090562b4d9bc4aedd70abcca26561af Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 23 Jan 2024 14:30:54 +0100 Subject: [PATCH 0329/1405] arm64: Revert "scs: Work around full LTO issue with dynamic SCS" This reverts commit 8c5a19cb17a71e ("arm64: scs: Work around full LTO issue with dynamic SCS"), which did not quite fix the issue as intended. Apparently, -fno-unwind-tables is ignored for the final full LTO link when it is set on any of the objects, resulting in an early boot crash due to the SCS patching code patching itself, and attempting to pop the return address from the shadow stack while the associated push was still a PACIASP instruction when it executed. Reported-by: Sami Tolvanen Signed-off-by: Ard Biesheuvel Reviewed-by: Kees Cook Reviewed-by: Sami Tolvanen Tested-by: Sami Tolvanen Link: https://lore.kernel.org/r/20240123133052.1417449-5-ardb+git@google.com Signed-off-by: Will Deacon --- arch/arm64/kernel/Makefile | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index e5d03a7039b4b..d95b3d6b471a7 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -73,13 +73,7 @@ obj-$(CONFIG_ARM64_MTE) += mte.o obj-y += vdso-wrap.o obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.o - -# We need to prevent the SCS patching code from patching itself. Using -# -mbranch-protection=none here to avoid the patchable PAC opcodes from being -# generated triggers an issue with full LTO on Clang, which stops emitting PAC -# instructions altogether. So instead, omit the unwind tables used by the -# patching code, so it will not be able to locate its own PAC instructions. -CFLAGS_patch-scs.o += -fno-asynchronous-unwind-tables -fno-unwind-tables +CFLAGS_patch-scs.o += -mbranch-protection=none # Force dependency (vdso*-wrap.S includes vdso.so through incbin) $(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so -- GitLab From d104a6fef3fec137d8d44961224ab76edbd6cbc7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 23 Jan 2024 14:30:55 +0100 Subject: [PATCH 0330/1405] arm64: scs: Disable LTO for SCS patching code Full LTO takes the '-mbranch-protection=none' passed to the compiler when generating the dynamic shadow call stack patching code as a hint to stop emitting PAC instructions altogether. (Thin LTO appears unaffected by this) Work around this by disabling LTO for the compilation unit, which appears to convince the linker that it should still use PAC in the rest of the kernel.. Fixes: 3b619e22c460 ("arm64: implement dynamic shadow call stack for Clang") Signed-off-by: Ard Biesheuvel Reviewed-by: Kees Cook Reviewed-by: Sami Tolvanen Tested-by: Sami Tolvanen Link: https://lore.kernel.org/r/20240123133052.1417449-6-ardb+git@google.com Signed-off-by: Will Deacon --- arch/arm64/kernel/Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index d95b3d6b471a7..467cb71172730 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -73,7 +73,13 @@ obj-$(CONFIG_ARM64_MTE) += mte.o obj-y += vdso-wrap.o obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.o + +# We need to prevent the SCS patching code from patching itself. Using +# -mbranch-protection=none here to avoid the patchable PAC opcodes from being +# generated triggers an issue with full LTO on Clang, which stops emitting PAC +# instructions altogether. So disable LTO as well for the compilation unit. CFLAGS_patch-scs.o += -mbranch-protection=none +CFLAGS_REMOVE_patch-scs.o += $(CC_FLAGS_LTO) # Force dependency (vdso*-wrap.S includes vdso.so through incbin) $(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so -- GitLab From 4896bb7c0b31a0a3379b290ea7729900c59e0c69 Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Fri, 26 Jan 2024 10:10:41 +0100 Subject: [PATCH 0331/1405] net: stmmac: do not clear TBS enable bit on link up/down With the dma conf being reallocated on each call to stmmac_open(), any information in there is lost, unless we specifically handle it. The STMMAC_TBS_EN bit is set when adding an etf qdisc, and the etf qdisc therefore would stop working when link was set down and then back up. Fixes: ba39b344e924 ("net: ethernet: stmicro: stmmac: generate stmmac dma conf before open") Cc: stable@vger.kernel.org Signed-off-by: Esben Haabendal Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b334eb16da23a..25519952f754c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3932,6 +3932,9 @@ static int __stmmac_open(struct net_device *dev, priv->rx_copybreak = STMMAC_RX_COPYBREAK; buf_sz = dma_conf->dma_buf_sz; + for (int i = 0; i < MTL_MAX_TX_QUEUES; i++) + if (priv->dma_conf.tx_queue[i].tbs & STMMAC_TBS_EN) + dma_conf->tx_queue[i].tbs = priv->dma_conf.tx_queue[i].tbs; memcpy(&priv->dma_conf, dma_conf, sizeof(*dma_conf)); stmmac_reset_queues_param(priv); -- GitLab From 3b12ec8f618ebaccfe43ea4621a6f5fb586edef8 Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Fri, 26 Jan 2024 10:10:42 +0100 Subject: [PATCH 0332/1405] net: stmmac: dwmac-imx: set TSO/TBS TX queues default settings TSO and TBS cannot coexist. For now we set i.MX Ethernet QOS controller to use the first TX queue with TSO and the rest for TBS. TX queues with TBS can support etf qdisc hw offload. Signed-off-by: Esben Haabendal Reviewed-by: Kurt Kanzenbach Reviewed-by: Vadim Fedorenko Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 8f730ada71f91..6b65420e11b5c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -353,6 +353,10 @@ static int imx_dwmac_probe(struct platform_device *pdev) if (data->flags & STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY) plat_dat->flags |= STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY; + /* Default TX Q0 to use TSO and rest TXQ for TBS */ + for (int i = 1; i < plat_dat->tx_queues_to_use; i++) + plat_dat->tx_queues_cfg[i].tbs_en = 1; + plat_dat->host_dma_width = dwmac->ops->addr_width; plat_dat->init = imx_dwmac_init; plat_dat->exit = imx_dwmac_exit; -- GitLab From c7767f5c43df2c453af4651d1f58f489e3eb4ac1 Mon Sep 17 00:00:00 2001 From: Kevin Brodsky Date: Mon, 29 Jan 2024 15:47:48 +0000 Subject: [PATCH 0333/1405] arm64: vdso32: Remove unused vdso32-offsets.h Commit 2d071968a405 ("arm64: compat: Remove 32-bit sigreturn code from the vDSO") removed all VDSO_* symbols in the compat vDSO. As a result, vdso32-offsets.h is now empty and therefore unused. Time to remove it. Signed-off-by: Kevin Brodsky Link: https://lore.kernel.org/r/20240129154748.1727759-1-kevin.brodsky@arm.com Signed-off-by: Will Deacon --- arch/arm64/Makefile | 2 +- arch/arm64/include/asm/vdso.h | 3 --- arch/arm64/kernel/vdso32/Makefile | 9 --------- 3 files changed, 1 insertion(+), 13 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 47ecc4cff9d25..a88cdf9106871 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -195,7 +195,7 @@ vdso_prepare: prepare0 include/generated/vdso-offsets.h arch/arm64/kernel/vdso/vdso.so ifdef CONFIG_COMPAT_VDSO $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 \ - include/generated/vdso32-offsets.h arch/arm64/kernel/vdso32/vdso.so + arch/arm64/kernel/vdso32/vdso.so endif endif diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h index b4ae321099327..4305995c8f82f 100644 --- a/arch/arm64/include/asm/vdso.h +++ b/arch/arm64/include/asm/vdso.h @@ -17,9 +17,6 @@ #ifndef __ASSEMBLY__ #include -#ifdef CONFIG_COMPAT_VDSO -#include -#endif #define VDSO_SYMBOL(base, name) \ ({ \ diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 2266fcdff78a0..f5f80fdce0fe7 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -127,9 +127,6 @@ obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) targets += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) -include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE - $(call if_changed,vdsosym) - # Strip rule for vdso.so $(obj)/vdso.so: OBJCOPYFLAGS := -S $(obj)/vdso.so: $(obj)/vdso32.so.dbg FORCE @@ -166,9 +163,3 @@ quiet_cmd_vdsoas = AS32 $@ quiet_cmd_vdsomunge = MUNGE $@ cmd_vdsomunge = $(obj)/$(munge) $< $@ - -# Generate vDSO offsets using helper script (borrowed from the 64-bit vDSO) -gen-vdsosym := $(srctree)/$(src)/../vdso/gen_vdso_offsets.sh -quiet_cmd_vdsosym = VDSOSYM $@ -# The AArch64 nm should be able to read an AArch32 binary - cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ -- GitLab From aa2b2eb3934859904c287bf5434647ba72e14c1c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 Jan 2024 16:55:32 +0000 Subject: [PATCH 0334/1405] llc: call sock_orphan() at release time syzbot reported an interesting trace [1] caused by a stale sk->sk_wq pointer in a closed llc socket. In commit ff7b11aa481f ("net: socket: set sock->sk to NULL after calling proto_ops::release()") Eric Biggers hinted that some protocols are missing a sock_orphan(), we need to perform a full audit. In net-next, I plan to clear sock->sk from sock_orphan() and amend Eric patch to add a warning. [1] BUG: KASAN: slab-use-after-free in list_empty include/linux/list.h:373 [inline] BUG: KASAN: slab-use-after-free in waitqueue_active include/linux/wait.h:127 [inline] BUG: KASAN: slab-use-after-free in sock_def_write_space_wfree net/core/sock.c:3384 [inline] BUG: KASAN: slab-use-after-free in sock_wfree+0x9a8/0x9d0 net/core/sock.c:2468 Read of size 8 at addr ffff88802f4fc880 by task ksoftirqd/1/27 CPU: 1 PID: 27 Comm: ksoftirqd/1 Not tainted 6.8.0-rc1-syzkaller-00049-g6098d87eaf31 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:377 [inline] print_report+0xc4/0x620 mm/kasan/report.c:488 kasan_report+0xda/0x110 mm/kasan/report.c:601 list_empty include/linux/list.h:373 [inline] waitqueue_active include/linux/wait.h:127 [inline] sock_def_write_space_wfree net/core/sock.c:3384 [inline] sock_wfree+0x9a8/0x9d0 net/core/sock.c:2468 skb_release_head_state+0xa3/0x2b0 net/core/skbuff.c:1080 skb_release_all net/core/skbuff.c:1092 [inline] napi_consume_skb+0x119/0x2b0 net/core/skbuff.c:1404 e1000_unmap_and_free_tx_resource+0x144/0x200 drivers/net/ethernet/intel/e1000/e1000_main.c:1970 e1000_clean_tx_irq drivers/net/ethernet/intel/e1000/e1000_main.c:3860 [inline] e1000_clean+0x4a1/0x26e0 drivers/net/ethernet/intel/e1000/e1000_main.c:3801 __napi_poll.constprop.0+0xb4/0x540 net/core/dev.c:6576 napi_poll net/core/dev.c:6645 [inline] net_rx_action+0x956/0xe90 net/core/dev.c:6778 __do_softirq+0x21a/0x8de kernel/softirq.c:553 run_ksoftirqd kernel/softirq.c:921 [inline] run_ksoftirqd+0x31/0x60 kernel/softirq.c:913 smpboot_thread_fn+0x660/0xa10 kernel/smpboot.c:164 kthread+0x2c6/0x3a0 kernel/kthread.c:388 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242 Allocated by task 5167: kasan_save_stack+0x33/0x50 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 unpoison_slab_object mm/kasan/common.c:314 [inline] __kasan_slab_alloc+0x81/0x90 mm/kasan/common.c:340 kasan_slab_alloc include/linux/kasan.h:201 [inline] slab_post_alloc_hook mm/slub.c:3813 [inline] slab_alloc_node mm/slub.c:3860 [inline] kmem_cache_alloc_lru+0x142/0x6f0 mm/slub.c:3879 alloc_inode_sb include/linux/fs.h:3019 [inline] sock_alloc_inode+0x25/0x1c0 net/socket.c:308 alloc_inode+0x5d/0x220 fs/inode.c:260 new_inode_pseudo+0x16/0x80 fs/inode.c:1005 sock_alloc+0x40/0x270 net/socket.c:634 __sock_create+0xbc/0x800 net/socket.c:1535 sock_create net/socket.c:1622 [inline] __sys_socket_create net/socket.c:1659 [inline] __sys_socket+0x14c/0x260 net/socket.c:1706 __do_sys_socket net/socket.c:1720 [inline] __se_sys_socket net/socket.c:1718 [inline] __x64_sys_socket+0x72/0xb0 net/socket.c:1718 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xd3/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Freed by task 0: kasan_save_stack+0x33/0x50 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 kasan_save_free_info+0x3f/0x60 mm/kasan/generic.c:640 poison_slab_object mm/kasan/common.c:241 [inline] __kasan_slab_free+0x121/0x1b0 mm/kasan/common.c:257 kasan_slab_free include/linux/kasan.h:184 [inline] slab_free_hook mm/slub.c:2121 [inline] slab_free mm/slub.c:4299 [inline] kmem_cache_free+0x129/0x350 mm/slub.c:4363 i_callback+0x43/0x70 fs/inode.c:249 rcu_do_batch kernel/rcu/tree.c:2158 [inline] rcu_core+0x819/0x1680 kernel/rcu/tree.c:2433 __do_softirq+0x21a/0x8de kernel/softirq.c:553 Last potentially related work creation: kasan_save_stack+0x33/0x50 mm/kasan/common.c:47 __kasan_record_aux_stack+0xba/0x100 mm/kasan/generic.c:586 __call_rcu_common.constprop.0+0x9a/0x7b0 kernel/rcu/tree.c:2683 destroy_inode+0x129/0x1b0 fs/inode.c:315 iput_final fs/inode.c:1739 [inline] iput.part.0+0x560/0x7b0 fs/inode.c:1765 iput+0x5c/0x80 fs/inode.c:1755 dentry_unlink_inode+0x292/0x430 fs/dcache.c:400 __dentry_kill+0x1ca/0x5f0 fs/dcache.c:603 dput.part.0+0x4ac/0x9a0 fs/dcache.c:845 dput+0x1f/0x30 fs/dcache.c:835 __fput+0x3b9/0xb70 fs/file_table.c:384 task_work_run+0x14d/0x240 kernel/task_work.c:180 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0xa8a/0x2ad0 kernel/exit.c:871 do_group_exit+0xd4/0x2a0 kernel/exit.c:1020 __do_sys_exit_group kernel/exit.c:1031 [inline] __se_sys_exit_group kernel/exit.c:1029 [inline] __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:1029 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xd3/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b The buggy address belongs to the object at ffff88802f4fc800 which belongs to the cache sock_inode_cache of size 1408 The buggy address is located 128 bytes inside of freed 1408-byte region [ffff88802f4fc800, ffff88802f4fcd80) The buggy address belongs to the physical page: page:ffffea0000bd3e00 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x2f4f8 head:ffffea0000bd3e00 order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0 anon flags: 0xfff00000000840(slab|head|node=0|zone=1|lastcpupid=0x7ff) page_type: 0xffffffff() raw: 00fff00000000840 ffff888013b06b40 0000000000000000 0000000000000001 raw: 0000000000000000 0000000080150015 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 3, migratetype Reclaimable, gfp_mask 0xd20d0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_RECLAIMABLE), pid 4956, tgid 4956 (sshd), ts 31423924727, free_ts 0 set_page_owner include/linux/page_owner.h:31 [inline] post_alloc_hook+0x2d0/0x350 mm/page_alloc.c:1533 prep_new_page mm/page_alloc.c:1540 [inline] get_page_from_freelist+0xa28/0x3780 mm/page_alloc.c:3311 __alloc_pages+0x22f/0x2440 mm/page_alloc.c:4567 __alloc_pages_node include/linux/gfp.h:238 [inline] alloc_pages_node include/linux/gfp.h:261 [inline] alloc_slab_page mm/slub.c:2190 [inline] allocate_slab mm/slub.c:2354 [inline] new_slab+0xcc/0x3a0 mm/slub.c:2407 ___slab_alloc+0x4af/0x19a0 mm/slub.c:3540 __slab_alloc.constprop.0+0x56/0xa0 mm/slub.c:3625 __slab_alloc_node mm/slub.c:3678 [inline] slab_alloc_node mm/slub.c:3850 [inline] kmem_cache_alloc_lru+0x379/0x6f0 mm/slub.c:3879 alloc_inode_sb include/linux/fs.h:3019 [inline] sock_alloc_inode+0x25/0x1c0 net/socket.c:308 alloc_inode+0x5d/0x220 fs/inode.c:260 new_inode_pseudo+0x16/0x80 fs/inode.c:1005 sock_alloc+0x40/0x270 net/socket.c:634 __sock_create+0xbc/0x800 net/socket.c:1535 sock_create net/socket.c:1622 [inline] __sys_socket_create net/socket.c:1659 [inline] __sys_socket+0x14c/0x260 net/socket.c:1706 __do_sys_socket net/socket.c:1720 [inline] __se_sys_socket net/socket.c:1718 [inline] __x64_sys_socket+0x72/0xb0 net/socket.c:1718 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xd3/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b page_owner free stack trace missing Memory state around the buggy address: ffff88802f4fc780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88802f4fc800: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88802f4fc880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88802f4fc900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88802f4fc980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 43815482370c ("net: sock_def_readable() and friends RCU conversion") Reported-and-tested-by: syzbot+32b89eaa102b372ff76d@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Cc: Eric Biggers Cc: Kuniyuki Iwashima Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240126165532.3396702-1-edumazet@google.com Signed-off-by: Paolo Abeni --- net/llc/af_llc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 20551cfb7da6d..fde1140d899ef 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -226,6 +226,8 @@ static int llc_ui_release(struct socket *sock) } netdev_put(llc->dev, &llc->dev_tracker); sock_put(sk); + sock_orphan(sk); + sock->sk = NULL; llc_sk_free(sk); out: return 0; -- GitLab From c16dfab33f99fc3ff43d48253bc2784ccb84c1de Mon Sep 17 00:00:00 2001 From: Kenzo Gomez Date: Sat, 27 Jan 2024 17:46:21 +0100 Subject: [PATCH 0335/1405] ALSA: hda: cs35l41: Support additional ASUS Zenbook UX3402VA Add new model entry into configuration table. Signed-off-by: Kenzo Gomez Link: https://lore.kernel.org/r/20240127164621.26431-1-kenzo.sgomez@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l41_hda_property.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c index 35277ce890a46..59504852adc69 100644 --- a/sound/pci/hda/cs35l41_hda_property.c +++ b/sound/pci/hda/cs35l41_hda_property.c @@ -76,6 +76,7 @@ static const struct cs35l41_config cs35l41_config_table[] = { { "10431533", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, { "10431573", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, { "10431663", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 1000, 4500, 24 }, + { "104316A3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, { "104316D3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, { "104316F3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, { "104317F3", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, @@ -410,6 +411,7 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = { { "CSC3551", "10431533", generic_dsd_config }, { "CSC3551", "10431573", generic_dsd_config }, { "CSC3551", "10431663", generic_dsd_config }, + { "CSC3551", "104316A3", generic_dsd_config }, { "CSC3551", "104316D3", generic_dsd_config }, { "CSC3551", "104316F3", generic_dsd_config }, { "CSC3551", "104317F3", generic_dsd_config }, -- GitLab From 15d6daad8f8add8ef99b6e4e2b5bf0db48e1a8db Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Jan 2024 10:09:18 -0300 Subject: [PATCH 0336/1405] tools headers x86 cpufeatures: Sync with the kernel sources to pick TDX, Zen, APIC MSR fence changes To pick the changes from: 1e536e10689700e0 ("x86/cpu: Detect TDX partial write machine check erratum") 765a0542fdc7aad7 ("x86/virt/tdx: Detect TDX during kernel boot") 30fa92832f405d5a ("x86/CPU/AMD: Add ZenX generations flags") 04c3024560d3a14a ("x86/barrier: Do not serialize MSR accesses on AMD") This causes these perf files to be rebuilt and brings some X86_FEATURE that will be used when updating the copies of tools/arch/x86/lib/mem{cpy,set}_64.S with the kernel sources: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses this perf build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Cc: Adrian Hunter Cc: Borislav Petkov Cc: Dave Hansen Cc: Ian Rogers Cc: Jiri Olsa Cc: Kai Huang Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index f4542d2718f4f..29cb275a219d7 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -198,6 +198,7 @@ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ +#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* Platform supports being a TDX host */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ @@ -308,10 +309,14 @@ #define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ #define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ #define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */ - #define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ #define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ #define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ +#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ +#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */ +#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */ +#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */ +#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ @@ -495,6 +500,7 @@ #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ #define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ #define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */ +#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */ /* BUG word 2 */ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ -- GitLab From be220d2e5544ff094142d263db5cf94d034b5e39 Mon Sep 17 00:00:00 2001 From: Chhayly Leang Date: Fri, 26 Jan 2024 15:09:12 +0700 Subject: [PATCH 0337/1405] ALSA: hda: cs35l41: Support ASUS Zenbook UM3402YAR Adds sound support for ASUS Zenbook UM3402YAR with missing DSD Signed-off-by: Chhayly Leang Link: https://lore.kernel.org/r/20240126080912.87422-1-clw.leang@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l41_hda_property.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c index 59504852adc69..d74cf11eef1ea 100644 --- a/sound/pci/hda/cs35l41_hda_property.c +++ b/sound/pci/hda/cs35l41_hda_property.c @@ -76,6 +76,7 @@ static const struct cs35l41_config cs35l41_config_table[] = { { "10431533", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, { "10431573", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 }, { "10431663", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 1000, 4500, 24 }, + { "10431683", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, { "104316A3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, { "104316D3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, { "104316F3", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, @@ -411,6 +412,7 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = { { "CSC3551", "10431533", generic_dsd_config }, { "CSC3551", "10431573", generic_dsd_config }, { "CSC3551", "10431663", generic_dsd_config }, + { "CSC3551", "10431683", generic_dsd_config }, { "CSC3551", "104316A3", generic_dsd_config }, { "CSC3551", "104316D3", generic_dsd_config }, { "CSC3551", "104316F3", generic_dsd_config }, -- GitLab From f7c4cb4a3f77867612b45c6327f80eac58a8ce65 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Thu, 25 Jan 2024 22:35:19 +0000 Subject: [PATCH 0338/1405] ALSA: pcm: Add missing formats to formats list Add 4 missing formats to 'snd_pcm_format_names' array in order to be able to get their names with 'snd_pcm_format_name' function. Signed-off-by: Ivan Orlov Link: https://lore.kernel.org/r/20240125223522.1122765-1-ivan.orlov0322@gmail.com Signed-off-by: Takashi Iwai --- sound/core/pcm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/core/pcm.c b/sound/core/pcm.c index a09f0154e6a70..d0788126cbab1 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -211,6 +211,10 @@ static const char * const snd_pcm_format_names[] = { FORMAT(DSD_U32_LE), FORMAT(DSD_U16_BE), FORMAT(DSD_U32_BE), + FORMAT(S20_LE), + FORMAT(S20_BE), + FORMAT(U20_LE), + FORMAT(U20_BE), }; /** -- GitLab From efe80f9c9063228136bc3824f7ac6b4ff2e273b4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Jan 2024 10:45:24 -0300 Subject: [PATCH 0339/1405] tools headers: Update the copy of x86's mem{cpy,set}_64.S used in 'perf bench' This is to get the changes from: 94ea9c05219518ef ("x86/headers: Replace #include with #include ") 10f4c9b9a33b7df0 ("x86/asm: Fix build of UML with KASAN") That addresses these perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Masahiro Yamada Cc: Namhyung Kim Cc: Vincent Whitchurch Link: https://lore.kernel.org/lkml/ZbkIKpKdNqOFdMwJ@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/lib/memcpy_64.S | 4 ++-- tools/arch/x86/lib/memset_64.S | 4 ++-- tools/perf/util/include/linux/linkage.h | 4 ++++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index d055b82d22ccd..59cf6f9065aa8 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -1,11 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* Copyright 2002 Andi Kleen */ +#include #include #include #include #include -#include .section .noinstr.text, "ax" @@ -39,7 +39,7 @@ SYM_TYPED_FUNC_START(__memcpy) SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) -SYM_FUNC_ALIAS(memcpy, __memcpy) +SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy) EXPORT_SYMBOL(memcpy) SYM_FUNC_START_LOCAL(memcpy_orig) diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index 7c59a704c4584..0199d56cb479d 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -1,10 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright 2002 Andi Kleen, SuSE Labs */ +#include #include #include #include -#include .section .noinstr.text, "ax" @@ -40,7 +40,7 @@ SYM_FUNC_START(__memset) SYM_FUNC_END(__memset) EXPORT_SYMBOL(__memset) -SYM_FUNC_ALIAS(memset, __memset) +SYM_FUNC_ALIAS_MEMFUNC(memset, __memset) EXPORT_SYMBOL(memset) SYM_FUNC_START_LOCAL(memset_orig) diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h index 75e2248416f55..178b00205fe6a 100644 --- a/tools/perf/util/include/linux/linkage.h +++ b/tools/perf/util/include/linux/linkage.h @@ -115,6 +115,10 @@ SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK) #endif +#ifndef SYM_FUNC_ALIAS_MEMFUNC +#define SYM_FUNC_ALIAS_MEMFUNC SYM_FUNC_ALIAS +#endif + // In the kernel sources (include/linux/cfi_types.h), this has a different // definition when CONFIG_CFI_CLANG is used, for tools/ just use the !clang // definition: -- GitLab From 7814fe24a6211a610db0b408d87420403b5b7a36 Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 24 Jan 2024 09:43:57 +0000 Subject: [PATCH 0340/1405] perf evlist: Fix evlist__new_default() for > 1 core PMU The 'Session topology' test currently fails with this message when evlist__new_default() opens more than one event: 32: Session topology : --- start --- templ file: /tmp/perf-test-vv5YzZ Using CPUID 0x00000000410fd070 Opening: unknown-hardware:HG ------------------------------------------------------------ perf_event_attr: type 0 (PERF_TYPE_HARDWARE) config 0xb00000000 disabled 1 ------------------------------------------------------------ sys_perf_event_open: pid 0 cpu -1 group_fd -1 flags 0x8 = 4 Opening: unknown-hardware:HG ------------------------------------------------------------ perf_event_attr: type 0 (PERF_TYPE_HARDWARE) config 0xa00000000 disabled 1 ------------------------------------------------------------ sys_perf_event_open: pid 0 cpu -1 group_fd -1 flags 0x8 = 5 non matching sample_type FAILED tests/topology.c:73 can't get session ---- end ---- Session topology: FAILED! This is because when re-opening the file and parsing the header, Perf expects that any file that has more than one event has the sample ID flag set. Perf record already sets the flag in a similar way when there is more than one event, so add the same logic to evlist__new_default(). evlist__new_default() is only currently used in tests, so I don't expect this change to have any other side effects. The other tests that use it don't save and re-open the file so don't hit this issue. The session topology test has been failing on Arm big.LITTLE platforms since commit 251aa040244a3b17 ("perf parse-events: Wildcard most "numeric" events") when evlist__new_default() started opening multiple events for 'cycles'. Fixes: 251aa040244a3b17 ("perf parse-events: Wildcard most "numeric" events") Reviewed-by: Ian Rogers Signed-off-by: James Clark [ This was failing as well on a Rocket Lake Refresh/14700k Intel hybrid system - Arnaldo ] Tested-by: Arnaldo Carvalho de Melo Tested-by: Ian Rogers Tested-by: Kan Liang Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Changbin Du Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Yang Jihong Closes: https://lore.kernel.org/lkml/CAP-5=fWVQ-7ijjK3-w1q+k2WYVNHbAcejb-xY0ptbjRw476VKA@mail.gmail.com/ Link: https://lore.kernel.org/r/20240124094358.489372-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 95f25e9fb994a..55a300a0977b4 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -103,7 +103,14 @@ struct evlist *evlist__new_default(void) err = parse_event(evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu"); if (err) { evlist__delete(evlist); - evlist = NULL; + return NULL; + } + + if (evlist->core.nr_entries > 1) { + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) + evsel__set_sample_id(evsel, /*can_sample_identifier=*/false); } return evlist; -- GitLab From 1f8c43b09ec6906079ac1f02e2b0a381c6f48c6c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Jan 2024 11:44:00 -0300 Subject: [PATCH 0341/1405] tools include UAPI: Sync linux/mount.h copy with the kernel sources To pick the changes from: 35e27a5744131996 ("fs: keep struct mnt_id_req extensible") b4c2bea8ceaa50cd ("add listmount(2) syscall") 46eae99ef73302f9 ("add statmount(2) syscall") That doesn't change anything in tools this time as nothing that is harvested by the beauty scripts got changed: $ ls -1 tools/perf/trace/beauty/*mount*sh tools/perf/trace/beauty/fsmount.sh tools/perf/trace/beauty/mount_flags.sh tools/perf/trace/beauty/move_mount_flags.sh $ This addresses this perf build warning. Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/mount.h include/uapi/linux/mount.h Cc: Adrian Hunter Cc: Christian Brauner Cc: Ian Rogers Cc: Jiri Olsa Cc: Miklos Szeredi Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZbkMiB7ZcOsLP2V5@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/mount.h | 70 ++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h index bb242fdcfe6b2..ad5478dbad007 100644 --- a/tools/include/uapi/linux/mount.h +++ b/tools/include/uapi/linux/mount.h @@ -138,4 +138,74 @@ struct mount_attr { /* List of all mount_attr versions. */ #define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */ + +/* + * Structure for getting mount/superblock/filesystem info with statmount(2). + * + * The interface is similar to statx(2): individual fields or groups can be + * selected with the @mask argument of statmount(). Kernel will set the @mask + * field according to the supported fields. + * + * If string fields are selected, then the caller needs to pass a buffer that + * has space after the fixed part of the structure. Nul terminated strings are + * copied there and offsets relative to @str are stored in the relevant fields. + * If the buffer is too small, then EOVERFLOW is returned. The actually used + * size is returned in @size. + */ +struct statmount { + __u32 size; /* Total size, including strings */ + __u32 __spare1; + __u64 mask; /* What results were written */ + __u32 sb_dev_major; /* Device ID */ + __u32 sb_dev_minor; + __u64 sb_magic; /* ..._SUPER_MAGIC */ + __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */ + __u32 fs_type; /* [str] Filesystem type */ + __u64 mnt_id; /* Unique ID of mount */ + __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */ + __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */ + __u32 mnt_parent_id_old; + __u64 mnt_attr; /* MOUNT_ATTR_... */ + __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */ + __u64 mnt_peer_group; /* ID of shared peer group */ + __u64 mnt_master; /* Mount receives propagation from this ID */ + __u64 propagate_from; /* Propagation from in current namespace */ + __u32 mnt_root; /* [str] Root of mount relative to root of fs */ + __u32 mnt_point; /* [str] Mountpoint relative to current root */ + __u64 __spare2[50]; + char str[]; /* Variable size part containing strings */ +}; + +/* + * Structure for passing mount ID and miscellaneous parameters to statmount(2) + * and listmount(2). + * + * For statmount(2) @param represents the request mask. + * For listmount(2) @param represents the last listed mount id (or zero). + */ +struct mnt_id_req { + __u32 size; + __u32 spare; + __u64 mnt_id; + __u64 param; +}; + +/* List of all mnt_id_req versions. */ +#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ + +/* + * @mask bits for statmount(2) + */ +#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */ +#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */ +#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */ +#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ +#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ +#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ + +/* + * Special @mnt_id values that can be passed to listmount + */ +#define LSMT_ROOT 0xffffffffffffffff /* root mount */ + #endif /* _UAPI_LINUX_MOUNT_H */ -- GitLab From 12b17b4eb82a41977eb848048137b5908d52845c Mon Sep 17 00:00:00 2001 From: Leonard Dallmayr Date: Fri, 5 Jan 2024 13:35:51 +0100 Subject: [PATCH 0342/1405] USB: serial: cp210x: add ID for IMST iM871A-USB The device IMST USB-Stick for Smart Meter is a rebranded IMST iM871A-USB Wireless M-Bus USB-adapter. It is used to read wireless water, gas and electricity meters. Signed-off-by: Leonard Dallmayr Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 1e61fe0431715..923e0ed85444b 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -146,6 +146,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ { USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */ { USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */ + { USB_DEVICE(0x10C4, 0x87ED) }, /* IMST USB-Stick for Smart Meter */ { USB_DEVICE(0x10C4, 0x8856) }, /* CEL EM357 ZigBee USB Stick - LR */ { USB_DEVICE(0x10C4, 0x8857) }, /* CEL EM357 ZigBee USB Stick */ { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */ -- GitLab From 129690fb229a20b6e563a77a2c85266acecf20bc Mon Sep 17 00:00:00 2001 From: JackBB Wu Date: Tue, 23 Jan 2024 17:39:48 +0800 Subject: [PATCH 0343/1405] USB: serial: qcserial: add new usb-id for Dell Wireless DW5826e Add support for Dell DW5826e with USB-id 0x413c:0x8217 & 0x413c:0x8218. It is 0x413c:0x8217 T: Bus=02 Lev=01 Prnt=01 Port=05 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=413c ProdID=8217 Rev= 5.04 S: Manufacturer=DELL S: Product=COMPAL Electronics EXM-G1A S: SerialNumber=359302940050401 C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=qcserial E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=usbfs E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=qcserial E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=qcserial E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=87(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I:* If#= 8 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms It is 0x413c:0x8218 T: Bus=02 Lev=01 Prnt=01 Port=05 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=413c ProdID=8218 Rev= 0.00 S: Manufacturer=DELL S: Product=COMPAL Electronics EXM-G1A S: SerialNumber=359302940050401 C:* #Ifs= 1 Cfg#= 1 Atr=a0 MxPwr= 2mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=qcserial E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: JackBB Wu Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/qcserial.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index b1e844bf31f81..703a9c5635573 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -184,6 +184,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x413c, 0x81d0)}, /* Dell Wireless 5819 */ {DEVICE_SWI(0x413c, 0x81d1)}, /* Dell Wireless 5818 */ {DEVICE_SWI(0x413c, 0x81d2)}, /* Dell Wireless 5818 */ + {DEVICE_SWI(0x413c, 0x8217)}, /* Dell Wireless DW5826e */ + {DEVICE_SWI(0x413c, 0x8218)}, /* Dell Wireless DW5826e QDL */ /* Huawei devices */ {DEVICE_HWI(0x03f0, 0x581d)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */ -- GitLab From d877550eaf2dc9090d782864c96939397a3c6835 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 29 Jan 2024 22:36:03 -0800 Subject: [PATCH 0344/1405] x86/fpu: Stop relying on userspace for info to fault in xsave buffer Before this change, the expected size of the user space buffer was taken from fx_sw->xstate_size. fx_sw->xstate_size can be changed from user-space, so it is possible construct a sigreturn frame where: * fx_sw->xstate_size is smaller than the size required by valid bits in fx_sw->xfeatures. * user-space unmaps parts of the sigrame fpu buffer so that not all of the buffer required by xrstor is accessible. In this case, xrstor tries to restore and accesses the unmapped area which results in a fault. But fault_in_readable succeeds because buf + fx_sw->xstate_size is within the still mapped area, so it goes back and tries xrstor again. It will spin in this loop forever. Instead, fault in the maximum size which can be touched by XRSTOR (taken from fpstate->user_size). [ dhansen: tweak subject / changelog ] Fixes: fcb3635f5018 ("x86/fpu/signal: Handle #PF in the direct restore path") Reported-by: Konstantin Bogomolov Suggested-by: Thomas Gleixner Signed-off-by: Andrei Vagin Signed-off-by: Dave Hansen Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20240130063603.3392627-1-avagin%40google.com --- arch/x86/kernel/fpu/signal.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 558076dbde5bf..247f2225aa9f3 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -274,12 +274,13 @@ static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures, * Attempt to restore the FPU registers directly from user memory. * Pagefaults are handled and any errors returned are fatal. */ -static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, - bool fx_only, unsigned int size) +static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only) { struct fpu *fpu = ¤t->thread.fpu; int ret; + /* Restore enabled features only. */ + xrestore &= fpu->fpstate->user_xfeatures; retry: fpregs_lock(); /* Ensure that XFD is up to date */ @@ -309,7 +310,7 @@ static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, if (ret != X86_TRAP_PF) return false; - if (!fault_in_readable(buf, size)) + if (!fault_in_readable(buf, fpu->fpstate->user_size)) goto retry; return false; } @@ -339,7 +340,6 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, struct user_i387_ia32_struct env; bool success, fx_only = false; union fpregs_state *fpregs; - unsigned int state_size; u64 user_xfeatures = 0; if (use_xsave()) { @@ -349,17 +349,14 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, return false; fx_only = !fx_sw_user.magic1; - state_size = fx_sw_user.xstate_size; user_xfeatures = fx_sw_user.xfeatures; } else { user_xfeatures = XFEATURE_MASK_FPSSE; - state_size = fpu->fpstate->user_size; } if (likely(!ia32_fxstate)) { /* Restore the FPU registers directly from user memory. */ - return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only, - state_size); + return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only); } /* -- GitLab From a3fa9838e8140584a6f338e8516f2b05d3bea812 Mon Sep 17 00:00:00 2001 From: Patrick Rudolph Date: Tue, 30 Jan 2024 20:32:56 +0530 Subject: [PATCH 0345/1405] regulator (max5970): Fix IRQ handler The max5970 datasheet gives the impression that IRQ status bits must be cleared by writing a one to set bits, as those are marked with 'R/C', however tests showed that a zero must be written. Fixes an IRQ storm as the interrupt handler actually clears the IRQ status bits. Signed-off-by: Patrick Rudolph Signed-off-by: Naresh Solanki Link: https://msgid.link/r/20240130150257.3643657-1-naresh.solanki@9elements.com Signed-off-by: Mark Brown --- drivers/regulator/max5970-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/max5970-regulator.c b/drivers/regulator/max5970-regulator.c index bc88a40a88d4c..830a1c4cd7057 100644 --- a/drivers/regulator/max5970-regulator.c +++ b/drivers/regulator/max5970-regulator.c @@ -392,7 +392,7 @@ static int max597x_regmap_read_clear(struct regmap *map, unsigned int reg, return ret; if (*val) - return regmap_write(map, reg, *val); + return regmap_write(map, reg, 0); return 0; } -- GitLab From 6500ad28fd5d67d5ca0fee9da73c463090842440 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 30 Jan 2024 10:40:53 +0100 Subject: [PATCH 0346/1405] spi: sh-msiof: avoid integer overflow in constants cppcheck rightfully warned: drivers/spi/spi-sh-msiof.c:792:28: warning: Signed integer overflow for expression '7<<29'. [integerOverflow] sh_msiof_write(p, SIFCTR, SIFCTR_TFWM_1 | SIFCTR_RFWM_1); Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://msgid.link/r/20240130094053.10672-1-wsa+renesas@sang-engineering.com Signed-off-by: Mark Brown --- drivers/spi/spi-sh-msiof.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index cfc3b1ddbd229..6f12e4fb2e2e1 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -136,14 +136,14 @@ struct sh_msiof_spi_priv { /* SIFCTR */ #define SIFCTR_TFWM_MASK GENMASK(31, 29) /* Transmit FIFO Watermark */ -#define SIFCTR_TFWM_64 (0 << 29) /* Transfer Request when 64 empty stages */ -#define SIFCTR_TFWM_32 (1 << 29) /* Transfer Request when 32 empty stages */ -#define SIFCTR_TFWM_24 (2 << 29) /* Transfer Request when 24 empty stages */ -#define SIFCTR_TFWM_16 (3 << 29) /* Transfer Request when 16 empty stages */ -#define SIFCTR_TFWM_12 (4 << 29) /* Transfer Request when 12 empty stages */ -#define SIFCTR_TFWM_8 (5 << 29) /* Transfer Request when 8 empty stages */ -#define SIFCTR_TFWM_4 (6 << 29) /* Transfer Request when 4 empty stages */ -#define SIFCTR_TFWM_1 (7 << 29) /* Transfer Request when 1 empty stage */ +#define SIFCTR_TFWM_64 (0UL << 29) /* Transfer Request when 64 empty stages */ +#define SIFCTR_TFWM_32 (1UL << 29) /* Transfer Request when 32 empty stages */ +#define SIFCTR_TFWM_24 (2UL << 29) /* Transfer Request when 24 empty stages */ +#define SIFCTR_TFWM_16 (3UL << 29) /* Transfer Request when 16 empty stages */ +#define SIFCTR_TFWM_12 (4UL << 29) /* Transfer Request when 12 empty stages */ +#define SIFCTR_TFWM_8 (5UL << 29) /* Transfer Request when 8 empty stages */ +#define SIFCTR_TFWM_4 (6UL << 29) /* Transfer Request when 4 empty stages */ +#define SIFCTR_TFWM_1 (7UL << 29) /* Transfer Request when 1 empty stage */ #define SIFCTR_TFUA_MASK GENMASK(26, 20) /* Transmit FIFO Usable Area */ #define SIFCTR_TFUA_SHIFT 20 #define SIFCTR_TFUA(i) ((i) << SIFCTR_TFUA_SHIFT) -- GitLab From f1fea725cc93fcc3c5af9a2af63ffdc40dd2259e Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Wed, 20 Dec 2023 10:11:51 -0500 Subject: [PATCH 0347/1405] selftests/livepatch: fix and refactor new dmesg message code The livepatching kselftests rely on comparing expected vs. observed dmesg output. After each test, new dmesg entries are determined by the 'comm' utility comparing a saved, pre-test copy of dmesg to post-test dmesg output. Alexander reports that the 'comm --nocheck-order -13' invocation used by the tests can be confused when dmesg entry timestamps vary in magnitude (ie, "[ 98.820331]" vs. "[ 100.031067]"), in which case, additional messages are reported as new. The unexpected entries then spoil the test results. Instead of relying on 'comm' or 'diff' to determine new testing dmesg entries, refactor the code: - pre-test : log a unique canary dmesg entry - test : run tests, log messages - post-test : filter dmesg starting from pre-test message Reported-by: Alexander Gordeev Closes: https://lore.kernel.org/live-patching/ZYAimyPYhxVA9wKg@li-008a6a4c-3549-11b2-a85c-c5cc2836eea2.ibm.com/ Signed-off-by: Joe Lawrence Acked-by: Alexander Gordeev Tested-by: Marcos Paulo de Souza Reviewed-by: Marcos Paulo de Souza Acked-by: Miroslav Benes Signed-off-by: Shuah Khan --- .../testing/selftests/livepatch/functions.sh | 37 +++++++++---------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh index c8416c54b4637..b1fd7362c2fee 100644 --- a/tools/testing/selftests/livepatch/functions.sh +++ b/tools/testing/selftests/livepatch/functions.sh @@ -42,17 +42,6 @@ function die() { exit 1 } -# save existing dmesg so we can detect new content -function save_dmesg() { - SAVED_DMESG=$(mktemp --tmpdir -t klp-dmesg-XXXXXX) - dmesg > "$SAVED_DMESG" -} - -# cleanup temporary dmesg file from save_dmesg() -function cleanup_dmesg_file() { - rm -f "$SAVED_DMESG" -} - function push_config() { DYNAMIC_DEBUG=$(grep '^kernel/livepatch' /sys/kernel/debug/dynamic_debug/control | \ awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}') @@ -99,7 +88,6 @@ function set_ftrace_enabled() { function cleanup() { pop_config - cleanup_dmesg_file } # setup_config - save the current config and set a script exit trap that @@ -280,7 +268,15 @@ function set_pre_patch_ret { function start_test { local test="$1" - save_dmesg + # Dump something unique into the dmesg log, then stash the entry + # in LAST_DMESG. The check_result() function will use it to + # find new kernel messages since the test started. + local last_dmesg_msg="livepatch kselftest timestamp: $(date --rfc-3339=ns)" + log "$last_dmesg_msg" + loop_until 'dmesg | grep -q "$last_dmesg_msg"' || + die "buffer busy? can't find canary dmesg message: $last_dmesg_msg" + LAST_DMESG=$(dmesg | grep "$last_dmesg_msg") + echo -n "TEST: $test ... " log "===== TEST: $test =====" } @@ -291,23 +287,24 @@ function check_result { local expect="$*" local result - # Note: when comparing dmesg output, the kernel log timestamps - # help differentiate repeated testing runs. Remove them with a - # post-comparison sed filter. - - result=$(dmesg | comm --nocheck-order -13 "$SAVED_DMESG" - | \ + # Test results include any new dmesg entry since LAST_DMESG, then: + # - include lines matching keywords + # - exclude lines matching keywords + # - filter out dmesg timestamp prefixes + result=$(dmesg | awk -v last_dmesg="$LAST_DMESG" 'p; $0 == last_dmesg { p=1 }' | \ grep -e 'livepatch:' -e 'test_klp' | \ grep -v '\(tainting\|taints\) kernel' | \ sed 's/^\[[ 0-9.]*\] //') if [[ "$expect" == "$result" ]] ; then echo "ok" + elif [[ "$result" == "" ]] ; then + echo -e "not ok\n\nbuffer overrun? can't find canary dmesg entry: $LAST_DMESG\n" + die "livepatch kselftest(s) failed" else echo -e "not ok\n\n$(diff -upr --label expected --label result <(echo "$expect") <(echo "$result"))\n" die "livepatch kselftest(s) failed" fi - - cleanup_dmesg_file } # check_sysfs_rights(modname, rel_path, expected_rights) - check sysfs -- GitLab From 5820cfee443f8a90ea3eb9f99f57f2049a4a93c3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 24 Jan 2024 13:00:18 +0000 Subject: [PATCH 0348/1405] kselftest/seccomp: Use kselftest output functions for benchmark In preparation for trying to output the test results themselves in TAP format rework all the prints in the benchmark to use the kselftest output functions. The uses of system() all produce single line output so we can avoid having to deal with fully managing the child process and continue to use system() by simply printing an empty message before we invoke system(). We also leave one printf() used to complete a line of output in place. Tested-by: Anders Roxell Acked-by: Kees Cook Signed-off-by: Mark Brown Signed-off-by: Shuah Khan --- .../selftests/seccomp/seccomp_benchmark.c | 45 ++++++++++--------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index 5b5c9d558dee0..93168dd2c1e30 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -38,10 +38,10 @@ unsigned long long timing(clockid_t clk_id, unsigned long long samples) i *= 1000000000ULL; i += finish.tv_nsec - start.tv_nsec; - printf("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n", - finish.tv_sec, finish.tv_nsec, - start.tv_sec, start.tv_nsec, - i, (double)i / 1000000000.0); + ksft_print_msg("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n", + finish.tv_sec, finish.tv_nsec, + start.tv_sec, start.tv_nsec, + i, (double)i / 1000000000.0); return i; } @@ -53,7 +53,7 @@ unsigned long long calibrate(void) pid_t pid, ret; int seconds = 15; - printf("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds); + ksft_print_msg("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds); samples = 0; pid = getpid(); @@ -102,14 +102,14 @@ long compare(const char *name_one, const char *name_eval, const char *name_two, { bool good; - printf("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two, - (long long)one, name_eval, (long long)two); + ksft_print_msg("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two, + (long long)one, name_eval, (long long)two); if (one > INT_MAX) { - printf("Miscalculation! Measurement went negative: %lld\n", (long long)one); + ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)one); return 1; } if (two > INT_MAX) { - printf("Miscalculation! Measurement went negative: %lld\n", (long long)two); + ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)two); return 1; } @@ -145,12 +145,15 @@ int main(int argc, char *argv[]) setbuf(stdout, NULL); - printf("Running on:\n"); + ksft_print_msg("Running on:\n"); + ksft_print_msg(""); system("uname -a"); - printf("Current BPF sysctl settings:\n"); + ksft_print_msg("Current BPF sysctl settings:\n"); /* Avoid using "sysctl" which may not be installed. */ + ksft_print_msg(""); system("grep -H . /proc/sys/net/core/bpf_jit_enable"); + ksft_print_msg(""); system("grep -H . /proc/sys/net/core/bpf_jit_harden"); if (argc > 1) @@ -158,11 +161,11 @@ int main(int argc, char *argv[]) else samples = calibrate(); - printf("Benchmarking %llu syscalls...\n", samples); + ksft_print_msg("Benchmarking %llu syscalls...\n", samples); /* Native call */ native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid native: %llu ns\n", native); + ksft_print_msg("getpid native: %llu ns\n", native); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); assert(ret == 0); @@ -172,33 +175,33 @@ int main(int argc, char *argv[]) assert(ret == 0); bitmap1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1); + ksft_print_msg("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1); /* Second filter resulting in a bitmap */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog); assert(ret == 0); bitmap2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2); + ksft_print_msg("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2); /* Third filter, can no longer be converted to bitmap */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); assert(ret == 0); filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1); + ksft_print_msg("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1); /* Fourth filter, can not be converted to bitmap because of filter 3 */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog); assert(ret == 0); filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2); + ksft_print_msg("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2); /* Estimations */ #define ESTIMATE(fmt, var, what) do { \ var = (what); \ - printf("Estimated " fmt ": %llu ns\n", var); \ + ksft_print_msg("Estimated " fmt ": %llu ns\n", var); \ if (var > INT_MAX) \ goto more_samples; \ } while (0) @@ -218,7 +221,7 @@ int main(int argc, char *argv[]) ESTIMATE("seccomp per-filter overhead (filters / 4)", per_filter2, (filter2 - native - entry) / 4); - printf("Expectations:\n"); + ksft_print_msg("Expectations:\n"); ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1); bits = compare("native", "≤", "1 filter", native, le, filter1); if (bits) @@ -230,7 +233,7 @@ int main(int argc, char *argv[]) bits = compare("1 bitmapped", "≈", "2 bitmapped", bitmap1 - native, approx, bitmap2 - native); if (bits) { - printf("Skipping constant action bitmap expectations: they appear unsupported.\n"); + ksft_print_msg("Skipping constant action bitmap expectations: they appear unsupported.\n"); goto out; } @@ -242,7 +245,7 @@ int main(int argc, char *argv[]) goto out; more_samples: - printf("Saw unexpected benchmark result. Try running again with more samples?\n"); + ksft_print_msg("Saw unexpected benchmark result. Try running again with more samples?\n"); out: return 0; } -- GitLab From b54761f6e9773350c0d1fb8e1e5aacaba7769d0f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 24 Jan 2024 13:00:19 +0000 Subject: [PATCH 0349/1405] kselftest/seccomp: Report each expectation we assert as a KTAP test The seccomp benchmark test makes a number of checks on the performance it measures and logs them to the output but does so in a custom format which none of the automated test runners understand meaning that the chances that anyone is paying attention are slim. Let's additionally log each result in KTAP format so that automated systems parsing the test output will see each comparison as a test case. The original logs are left in place since they provide the actual numbers for analysis. As part of this rework the flow for the main program so that when we skip tests we still log all the tests we skip, this is because the standard KTAP headers and footers include counts of the number of expected and run tests. Tested-by: Anders Roxell Acked-by: Kees Cook Signed-off-by: Mark Brown Signed-off-by: Shuah Khan --- .../selftests/seccomp/seccomp_benchmark.c | 61 +++++++++++++------ 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index 93168dd2c1e30..97b86980b768f 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -98,24 +98,36 @@ bool le(int i_one, int i_two) } long compare(const char *name_one, const char *name_eval, const char *name_two, - unsigned long long one, bool (*eval)(int, int), unsigned long long two) + unsigned long long one, bool (*eval)(int, int), unsigned long long two, + bool skip) { bool good; + if (skip) { + ksft_test_result_skip("%s %s %s\n", name_one, name_eval, + name_two); + return 0; + } + ksft_print_msg("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two, (long long)one, name_eval, (long long)two); if (one > INT_MAX) { ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)one); - return 1; + good = false; + goto out; } if (two > INT_MAX) { ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)two); - return 1; + good = false; + goto out; } good = eval(one, two); printf("%s\n", good ? "✔️" : "❌"); +out: + ksft_test_result(good, "%s %s %s\n", name_one, name_eval, name_two); + return good ? 0 : 1; } @@ -142,9 +154,13 @@ int main(int argc, char *argv[]) unsigned long long samples, calc; unsigned long long native, filter1, filter2, bitmap1, bitmap2; unsigned long long entry, per_filter1, per_filter2; + bool skip = false; setbuf(stdout, NULL); + ksft_print_header(); + ksft_set_plan(7); + ksft_print_msg("Running on:\n"); ksft_print_msg(""); system("uname -a"); @@ -202,8 +218,10 @@ int main(int argc, char *argv[]) #define ESTIMATE(fmt, var, what) do { \ var = (what); \ ksft_print_msg("Estimated " fmt ": %llu ns\n", var); \ - if (var > INT_MAX) \ - goto more_samples; \ + if (var > INT_MAX) { \ + skip = true; \ + ret |= 1; \ + } \ } while (0) ESTIMATE("total seccomp overhead for 1 bitmapped filter", calc, @@ -222,30 +240,33 @@ int main(int argc, char *argv[]) (filter2 - native - entry) / 4); ksft_print_msg("Expectations:\n"); - ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1); - bits = compare("native", "≤", "1 filter", native, le, filter1); + ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1, + skip); + bits = compare("native", "≤", "1 filter", native, le, filter1, + skip); if (bits) - goto more_samples; + skip = true; ret |= compare("per-filter (last 2 diff)", "≈", "per-filter (filters / 4)", - per_filter1, approx, per_filter2); + per_filter1, approx, per_filter2, skip); bits = compare("1 bitmapped", "≈", "2 bitmapped", - bitmap1 - native, approx, bitmap2 - native); + bitmap1 - native, approx, bitmap2 - native, skip); if (bits) { ksft_print_msg("Skipping constant action bitmap expectations: they appear unsupported.\n"); - goto out; + skip = true; } - ret |= compare("entry", "≈", "1 bitmapped", entry, approx, bitmap1 - native); - ret |= compare("entry", "≈", "2 bitmapped", entry, approx, bitmap2 - native); + ret |= compare("entry", "≈", "1 bitmapped", entry, approx, + bitmap1 - native, skip); + ret |= compare("entry", "≈", "2 bitmapped", entry, approx, + bitmap2 - native, skip); ret |= compare("native + entry + (per filter * 4)", "≈", "4 filters total", - entry + (per_filter1 * 4) + native, approx, filter2); - if (ret == 0) - goto out; + entry + (per_filter1 * 4) + native, approx, filter2, + skip); -more_samples: - ksft_print_msg("Saw unexpected benchmark result. Try running again with more samples?\n"); -out: - return 0; + if (ret) + ksft_print_msg("Saw unexpected benchmark result. Try running again with more samples?\n"); + + ksft_finished(); } -- GitLab From 8b1d72395635af45410b66cc4c4ab37a12c4a831 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 20 Jan 2024 15:29:27 +0100 Subject: [PATCH 0350/1405] parisc: Fix random data corruption from exception handler The current exception handler implementation, which assists when accessing user space memory, may exhibit random data corruption if the compiler decides to use a different register than the specified register %r29 (defined in ASM_EXCEPTIONTABLE_REG) for the error code. If the compiler choose another register, the fault handler will nevertheless store -EFAULT into %r29 and thus trash whatever this register is used for. Looking at the assembly I found that this happens sometimes in emulate_ldd(). To solve the issue, the easiest solution would be if it somehow is possible to tell the fault handler which register is used to hold the error code. Using %0 or %1 in the inline assembly is not posssible as it will show up as e.g. %r29 (with the "%r" prefix), which the GNU assembler can not convert to an integer. This patch takes another, better and more flexible approach: We extend the __ex_table (which is out of the execution path) by one 32-word. In this word we tell the compiler to insert the assembler instruction "or %r0,%r0,%reg", where %reg references the register which the compiler choosed for the error return code. In case of an access failure, the fault handler finds the __ex_table entry and can examine the opcode. The used register is encoded in the lowest 5 bits, and the fault handler can then store -EFAULT into this register. Since we extend the __ex_table to 3 words we can't use the BUILDTIME_TABLE_SORT config option any longer. Signed-off-by: Helge Deller Cc: # v6.0+ --- arch/parisc/Kconfig | 1 - arch/parisc/include/asm/assembly.h | 1 + arch/parisc/include/asm/extable.h | 64 +++++++++++++++++++++++++ arch/parisc/include/asm/special_insns.h | 6 ++- arch/parisc/include/asm/uaccess.h | 48 +++---------------- arch/parisc/kernel/cache.c | 4 +- arch/parisc/kernel/unaligned.c | 44 ++++++++--------- arch/parisc/mm/fault.c | 11 +++-- 8 files changed, 108 insertions(+), 71 deletions(-) create mode 100644 arch/parisc/include/asm/extable.h diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index d14ccc948a29b..5c845e8d59d92 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -25,7 +25,6 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG - select BUILDTIME_TABLE_SORT select HAVE_KERNEL_UNCOMPRESSED select HAVE_PCI select HAVE_PERF_EVENTS diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 74d17d7e759da..5937d5edaba1e 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -576,6 +576,7 @@ .section __ex_table,"aw" ! \ .align 4 ! \ .word (fault_addr - .), (except_addr - .) ! \ + or %r0,%r0,%r0 ! \ .previous diff --git a/arch/parisc/include/asm/extable.h b/arch/parisc/include/asm/extable.h new file mode 100644 index 0000000000000..4ea23e3d79dc9 --- /dev/null +++ b/arch/parisc/include/asm/extable.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PARISC_EXTABLE_H +#define __PARISC_EXTABLE_H + +#include +#include + +/* + * The exception table consists of three addresses: + * + * - A relative address to the instruction that is allowed to fault. + * - A relative address at which the program should continue (fixup routine) + * - An asm statement which specifies which CPU register will + * receive -EFAULT when an exception happens if the lowest bit in + * the fixup address is set. + * + * Note: The register specified in the err_opcode instruction will be + * modified at runtime if a fault happens. Register %r0 will be ignored. + * + * Since relative addresses are used, 32bit values are sufficient even on + * 64bit kernel. + */ + +struct pt_regs; +int fixup_exception(struct pt_regs *regs); + +#define ARCH_HAS_RELATIVE_EXTABLE +struct exception_table_entry { + int insn; /* relative address of insn that is allowed to fault. */ + int fixup; /* relative address of fixup routine */ + int err_opcode; /* sample opcode with register which holds error code */ +}; + +#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr, opcode )\ + ".section __ex_table,\"aw\"\n" \ + ".align 4\n" \ + ".word (" #fault_addr " - .), (" #except_addr " - .)\n" \ + opcode "\n" \ + ".previous\n" + +/* + * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry + * (with lowest bit set) for which the fault handler in fixup_exception() will + * load -EFAULT on fault into the register specified by the err_opcode instruction, + * and zeroes the target register in case of a read fault in get_user(). + */ +#define ASM_EXCEPTIONTABLE_VAR(__err_var) \ + int __err_var = 0 +#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr, register )\ + ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1, "or %%r0,%%r0," register) + +static inline void swap_ex_entry_fixup(struct exception_table_entry *a, + struct exception_table_entry *b, + struct exception_table_entry tmp, + int delta) +{ + a->fixup = b->fixup + delta; + b->fixup = tmp.fixup - delta; + a->err_opcode = b->err_opcode; + b->err_opcode = tmp.err_opcode; +} +#define swap_ex_entry_fixup swap_ex_entry_fixup + +#endif diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h index c822bd0c0e3c6..51f40eaf77806 100644 --- a/arch/parisc/include/asm/special_insns.h +++ b/arch/parisc/include/asm/special_insns.h @@ -8,7 +8,8 @@ "copy %%r0,%0\n" \ "8:\tlpa %%r0(%1),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \ + "or %%r0,%%r0,%%r0") \ : "=&r" (pa) \ : "r" (va) \ : "memory" \ @@ -22,7 +23,8 @@ "copy %%r0,%0\n" \ "8:\tlpa %%r0(%%sr3,%1),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \ + "or %%r0,%%r0,%%r0") \ : "=&r" (pa) \ : "r" (va) \ : "memory" \ diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 4165079898d9e..88d0ae5769dde 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -7,6 +7,7 @@ */ #include #include +#include #include #include @@ -26,37 +27,6 @@ #define STD_USER(sr, x, ptr) __put_user_asm(sr, "std", x, ptr) #endif -/* - * The exception table contains two values: the first is the relative offset to - * the address of the instruction that is allowed to fault, and the second is - * the relative offset to the address of the fixup routine. Since relative - * addresses are used, 32bit values are sufficient even on 64bit kernel. - */ - -#define ARCH_HAS_RELATIVE_EXTABLE -struct exception_table_entry { - int insn; /* relative address of insn that is allowed to fault. */ - int fixup; /* relative address of fixup routine */ -}; - -#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\ - ".section __ex_table,\"aw\"\n" \ - ".align 4\n" \ - ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \ - ".previous\n" - -/* - * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry - * (with lowest bit set) for which the fault handler in fixup_exception() will - * load -EFAULT into %r29 for a read or write fault, and zeroes the target - * register in case of a read fault in get_user(). - */ -#define ASM_EXCEPTIONTABLE_REG 29 -#define ASM_EXCEPTIONTABLE_VAR(__variable) \ - register long __variable __asm__ ("r29") = 0 -#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\ - ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1) - #define __get_user_internal(sr, val, ptr) \ ({ \ ASM_EXCEPTIONTABLE_VAR(__gu_err); \ @@ -83,7 +53,7 @@ struct exception_table_entry { \ __asm__("1: " ldx " 0(%%sr%2,%3),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \ : "=r"(__gu_val), "+r"(__gu_err) \ : "i"(sr), "r"(ptr)); \ \ @@ -115,8 +85,8 @@ struct exception_table_entry { "1: ldw 0(%%sr%2,%3),%0\n" \ "2: ldw 4(%%sr%2,%3),%R0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%1") \ : "=&r"(__gu_tmp.l), "+r"(__gu_err) \ : "i"(sr), "r"(ptr)); \ \ @@ -174,7 +144,7 @@ struct exception_table_entry { __asm__ __volatile__ ( \ "1: " stx " %1,0(%%sr%2,%3)\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \ : "+r"(__pu_err) \ : "r"(x), "i"(sr), "r"(ptr)) @@ -186,15 +156,14 @@ struct exception_table_entry { "1: stw %1,0(%%sr%2,%3)\n" \ "2: stw %R1,4(%%sr%2,%3)\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%0") \ : "+r"(__pu_err) \ : "r"(__val), "i"(sr), "r"(ptr)); \ } while (0) #endif /* !defined(CONFIG_64BIT) */ - /* * Complex access routines -- external declarations */ @@ -216,7 +185,4 @@ unsigned long __must_check raw_copy_from_user(void *dst, const void __user *src, #define INLINE_COPY_TO_USER #define INLINE_COPY_FROM_USER -struct pt_regs; -int fixup_exception(struct pt_regs *regs); - #endif /* __PARISC_UACCESS_H */ diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 0c015487e5db6..5552602fcaef1 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -854,7 +854,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes, #endif " fic,m %3(%4,%0)\n" "2: sync\n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b, "%1") : "+r" (start), "+r" (error) : "r" (end), "r" (dcache_stride), "i" (SR_USER)); } @@ -869,7 +869,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes, #endif " fdc,m %3(%4,%0)\n" "2: sync\n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b, "%1") : "+r" (start), "+r" (error) : "r" (end), "r" (icache_stride), "i" (SR_USER)); } diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index ce25acfe4889d..c520e551a1652 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -120,8 +120,8 @@ static int emulate_ldh(struct pt_regs *regs, int toreg) "2: ldbs 1(%%sr1,%3), %0\n" " depw %2, 23, 24, %0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "+r" (val), "+r" (ret), "=&r" (temp1) : "r" (saddr), "r" (regs->isr) ); @@ -152,8 +152,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) " mtctl %2,11\n" " vshd %0,%3,%0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "+r" (val), "+r" (ret), "=&r" (temp1), "=&r" (temp2) : "r" (saddr), "r" (regs->isr) ); @@ -189,8 +189,8 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) " mtsar %%r19\n" " shrpd %0,%%r20,%%sar,%0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "=r" (val), "+r" (ret) : "0" (val), "r" (saddr), "r" (regs->isr) : "r19", "r20" ); @@ -209,9 +209,9 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) " vshd %0,%R0,%0\n" " vshd %R0,%4,%R0\n" "4: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1") : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) : "r" (regs->isr) ); } @@ -244,8 +244,8 @@ static int emulate_sth(struct pt_regs *regs, int frreg) "1: stb %1, 0(%%sr1, %3)\n" "2: stb %2, 1(%%sr1, %3)\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0") : "+r" (ret), "=&r" (temp1) : "r" (val), "r" (regs->ior), "r" (regs->isr) ); @@ -285,8 +285,8 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop) " stw %%r20,0(%%sr1,%2)\n" " stw %%r21,4(%%sr1,%2)\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1" ); @@ -329,10 +329,10 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) "3: std %%r20,0(%%sr1,%2)\n" "4: std %%r21,8(%%sr1,%2)\n" "5: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1" ); @@ -357,11 +357,11 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) "4: stw %%r1,4(%%sr1,%2)\n" "5: stw %R1,8(%%sr1,%2)\n" "6: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r1" ); diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 2fe5b44986e09..c39de84e98b05 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -150,11 +150,16 @@ int fixup_exception(struct pt_regs *regs) * Fix up get_user() and put_user(). * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant * bit in the relative address of the fixup routine to indicate - * that gr[ASM_EXCEPTIONTABLE_REG] should be loaded with - * -EFAULT to report a userspace access error. + * that the register encoded in the "or %r0,%r0,register" + * opcode should be loaded with -EFAULT to report a userspace + * access error. */ if (fix->fixup & 1) { - regs->gr[ASM_EXCEPTIONTABLE_REG] = -EFAULT; + int fault_error_reg = fix->err_opcode & 0x1f; + if (!WARN_ON(!fault_error_reg)) + regs->gr[fault_error_reg] = -EFAULT; + pr_debug("Unalignment fixup of register %d at %pS\n", + fault_error_reg, (void*)regs->iaoq[0]); /* zero target register for get_user() */ if (parisc_acctyp(0, regs->iir) == VM_READ) { -- GitLab From a22fe1d6dec7e98535b97249fdc95c2be79120bb Mon Sep 17 00:00:00 2001 From: Frank Li Date: Tue, 23 Jan 2024 12:28:41 -0500 Subject: [PATCH 0351/1405] dmaengine: fix is_slave_direction() return false when DMA_DEV_TO_DEV is_slave_direction() should return true when direction is DMA_DEV_TO_DEV. Fixes: 49920bc66984 ("dmaengine: add new enum dma_transfer_direction") Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240123172842.3764529-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 3df70d6131c8f..752dbde4cec1f 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -953,7 +953,8 @@ static inline int dmaengine_slave_config(struct dma_chan *chan, static inline bool is_slave_direction(enum dma_transfer_direction direction) { - return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM); + return (direction == DMA_MEM_TO_DEV) || (direction == DMA_DEV_TO_MEM) || + (direction == DMA_DEV_TO_DEV); } static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single( -- GitLab From 7104ba0f1958adb250319e68a15eff89ec4fd36d Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sun, 28 Jan 2024 14:05:54 +0200 Subject: [PATCH 0352/1405] phy: ti: phy-omap-usb2: Fix NULL pointer dereference for SRP If the external phy working together with phy-omap-usb2 does not implement send_srp(), we may still attempt to call it. This can happen on an idle Ethernet gadget triggering a wakeup for example: configfs-gadget.g1 gadget.0: ECM Suspend configfs-gadget.g1 gadget.0: Port suspended. Triggering wakeup ... Unable to handle kernel NULL pointer dereference at virtual address 00000000 when execute ... PC is at 0x0 LR is at musb_gadget_wakeup+0x1d4/0x254 [musb_hdrc] ... musb_gadget_wakeup [musb_hdrc] from usb_gadget_wakeup+0x1c/0x3c [udc_core] usb_gadget_wakeup [udc_core] from eth_start_xmit+0x3b0/0x3d4 [u_ether] eth_start_xmit [u_ether] from dev_hard_start_xmit+0x94/0x24c dev_hard_start_xmit from sch_direct_xmit+0x104/0x2e4 sch_direct_xmit from __dev_queue_xmit+0x334/0xd88 __dev_queue_xmit from arp_solicit+0xf0/0x268 arp_solicit from neigh_probe+0x54/0x7c neigh_probe from __neigh_event_send+0x22c/0x47c __neigh_event_send from neigh_resolve_output+0x14c/0x1c0 neigh_resolve_output from ip_finish_output2+0x1c8/0x628 ip_finish_output2 from ip_send_skb+0x40/0xd8 ip_send_skb from udp_send_skb+0x124/0x340 udp_send_skb from udp_sendmsg+0x780/0x984 udp_sendmsg from __sys_sendto+0xd8/0x158 __sys_sendto from ret_fast_syscall+0x0/0x58 Let's fix the issue by checking for send_srp() and set_vbus() before calling them. For USB peripheral only cases these both could be NULL. Fixes: 657b306a7bdf ("usb: phy: add a new driver for omap usb2 phy") Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20240128120556.8848-1-tony@atomide.com Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-omap-usb2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/ti/phy-omap-usb2.c b/drivers/phy/ti/phy-omap-usb2.c index dd2913ac0fa28..78e19b128962a 100644 --- a/drivers/phy/ti/phy-omap-usb2.c +++ b/drivers/phy/ti/phy-omap-usb2.c @@ -117,7 +117,7 @@ static int omap_usb_set_vbus(struct usb_otg *otg, bool enabled) { struct omap_usb *phy = phy_to_omapusb(otg->usb_phy); - if (!phy->comparator) + if (!phy->comparator || !phy->comparator->set_vbus) return -ENODEV; return phy->comparator->set_vbus(phy->comparator, enabled); @@ -127,7 +127,7 @@ static int omap_usb_start_srp(struct usb_otg *otg) { struct omap_usb *phy = phy_to_omapusb(otg->usb_phy); - if (!phy->comparator) + if (!phy->comparator || !phy->comparator->start_srp) return -ENODEV; return phy->comparator->start_srp(phy->comparator); -- GitLab From bd504bcfec41a503b32054da5472904b404341a4 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 9 Jan 2024 15:57:56 +0100 Subject: [PATCH 0353/1405] dm: limit the number of targets and parameter size area The kvmalloc function fails with a warning if the size is larger than INT_MAX. The warning was triggered by a syscall testing robot. In order to avoid the warning, this commit limits the number of targets to 1048576 and the size of the parameter area to 1073741824. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-core.h | 2 ++ drivers/md/dm-ioctl.c | 3 ++- drivers/md/dm-table.c | 9 +++++++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 095b9b49aa825..e6757a30dccad 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -22,6 +22,8 @@ #include "dm-ima.h" #define DM_RESERVED_MAX_IOS 1024 +#define DM_MAX_TARGETS 1048576 +#define DM_MAX_TARGET_PARAMS 1024 struct dm_io; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index e65058e0ed06a..3b1ad7127cb84 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1941,7 +1941,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern minimum_data_size - sizeof(param_kernel->version))) return -EFAULT; - if (param_kernel->data_size < minimum_data_size) { + if (unlikely(param_kernel->data_size < minimum_data_size) || + unlikely(param_kernel->data_size > DM_MAX_TARGETS * DM_MAX_TARGET_PARAMS)) { DMERR("Invalid data size in the ioctl structure: %u", param_kernel->data_size); return -EINVAL; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 260b5b8f2b0d7..41f1d731ae5ac 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -129,7 +129,12 @@ static int alloc_targets(struct dm_table *t, unsigned int num) int dm_table_create(struct dm_table **result, blk_mode_t mode, unsigned int num_targets, struct mapped_device *md) { - struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL); + struct dm_table *t; + + if (num_targets > DM_MAX_TARGETS) + return -EOVERFLOW; + + t = kzalloc(sizeof(*t), GFP_KERNEL); if (!t) return -ENOMEM; @@ -144,7 +149,7 @@ int dm_table_create(struct dm_table **result, blk_mode_t mode, if (!num_targets) { kfree(t); - return -ENOMEM; + return -EOVERFLOW; } if (alloc_targets(t, num_targets)) { -- GitLab From 9cf11ce06ea52911245578032761e40a6409cf35 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 9 Jan 2024 15:59:15 +0100 Subject: [PATCH 0354/1405] dm stats: limit the number of entries The kvmalloc function fails with a warning if the size is larger than INT_MAX. Linus said that there should be limits that prevent this warning from being hit. This commit adds the limits to the dm-stats subsystem in DM core. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-stats.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index bdc14ec998141..1e5d988f44da6 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -66,6 +66,9 @@ struct dm_stats_last_position { unsigned int last_rw; }; +#define DM_STAT_MAX_ENTRIES 8388608 +#define DM_STAT_MAX_HISTOGRAM_ENTRIES 134217728 + /* * A typo on the command line could possibly make the kernel run out of memory * and crash. To prevent the crash we account all used memory. We fail if we @@ -285,6 +288,9 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, if (n_entries != (size_t)n_entries || !(size_t)(n_entries + 1)) return -EOVERFLOW; + if (n_entries > DM_STAT_MAX_ENTRIES) + return -EOVERFLOW; + shared_alloc_size = struct_size(s, stat_shared, n_entries); if ((shared_alloc_size - sizeof(struct dm_stat)) / sizeof(struct dm_stat_shared) != n_entries) return -EOVERFLOW; @@ -297,6 +303,9 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, if (histogram_alloc_size / (n_histogram_entries + 1) != (size_t)n_entries * sizeof(unsigned long long)) return -EOVERFLOW; + if ((n_histogram_entries + 1) * (size_t)n_entries > DM_STAT_MAX_HISTOGRAM_ENTRIES) + return -EOVERFLOW; + if (!check_shared_memory(shared_alloc_size + histogram_alloc_size + num_possible_cpus() * (percpu_alloc_size + histogram_alloc_size))) return -ENOMEM; -- GitLab From 40ef8756fbdd9faec4da5d70352b28f1196132ed Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 9 Jan 2024 16:00:57 +0100 Subject: [PATCH 0355/1405] dm writecache: allow allocations larger than 2GiB The function kvmalloc_node limits the allocation size to INT_MAX. This limit will be overflowed if dm-writecache attempts to map a device with 1TiB or larger length. This commit changes kvmalloc_array to vmalloc_array to avoid the limit. The commit also changes vmalloc(array_size()) to vmalloc_array(). Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-writecache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 074cb785eafc1..b463c28c39ad3 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -299,7 +299,7 @@ static int persistent_memory_claim(struct dm_writecache *wc) long i; wc->memory_map = NULL; - pages = kvmalloc_array(p, sizeof(struct page *), GFP_KERNEL); + pages = vmalloc_array(p, sizeof(struct page *)); if (!pages) { r = -ENOMEM; goto err2; @@ -330,7 +330,7 @@ static int persistent_memory_claim(struct dm_writecache *wc) r = -ENOMEM; goto err3; } - kvfree(pages); + vfree(pages); wc->memory_vmapped = true; } @@ -341,7 +341,7 @@ static int persistent_memory_claim(struct dm_writecache *wc) return 0; err3: - kvfree(pages); + vfree(pages); err2: dax_read_unlock(id); err1: @@ -962,7 +962,7 @@ static int writecache_alloc_entries(struct dm_writecache *wc) if (wc->entries) return 0; - wc->entries = vmalloc(array_size(sizeof(struct wc_entry), wc->n_blocks)); + wc->entries = vmalloc_array(wc->n_blocks, sizeof(struct wc_entry)); if (!wc->entries) return -ENOMEM; for (b = 0; b < wc->n_blocks; b++) { -- GitLab From 53ed2ac8fc1de6658aadae5714627ac99b9dddb0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 30 Jan 2024 11:34:49 -0800 Subject: [PATCH 0356/1405] soc: apple: mailbox: error pointers are negative integers In an entirely unrelated discussion where I pointed out a stupid thinko of mine, Rasmus piped up and noted that that obvious mistake already existed elsewhere in the kernel tree. An "error pointer" is the negative error value encoded as a pointer, making the whole "return error or valid pointer" use-case simple and straightforward. We use it all over the kernel. But the key here is that errors are _negative_ error numbers, not the horrid UNIX user-level model of "-1 and the value of 'errno'". The Apple mailbox driver used the positive error values, and thus just returned invalid normal pointers instead of actual errors. Of course, the reason nobody ever noticed is that the errors presumably never actually happen, so this is fixing a conceptual bug rather than an actual one. Reported-by: Rasmus Villemoes Link: https://lore.kernel.org/all/5c30afe0-f9fb-45d5-9333-dd914a1ea93a@prevas.dk/ Signed-off-by: Linus Torvalds --- drivers/soc/apple/mailbox.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/soc/apple/mailbox.c b/drivers/soc/apple/mailbox.c index 780199bf351ef..49a0955e82d6c 100644 --- a/drivers/soc/apple/mailbox.c +++ b/drivers/soc/apple/mailbox.c @@ -296,14 +296,14 @@ struct apple_mbox *apple_mbox_get(struct device *dev, int index) of_node_put(args.np); if (!pdev) - return ERR_PTR(EPROBE_DEFER); + return ERR_PTR(-EPROBE_DEFER); mbox = platform_get_drvdata(pdev); if (!mbox) - return ERR_PTR(EPROBE_DEFER); + return ERR_PTR(-EPROBE_DEFER); if (!device_link_add(dev, &pdev->dev, DL_FLAG_AUTOREMOVE_CONSUMER)) - return ERR_PTR(ENODEV); + return ERR_PTR(-ENODEV); return mbox; } -- GitLab From 65612e993493014cb95be81ca4f98e08732c46d0 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 6 Dec 2023 18:02:47 +0100 Subject: [PATCH 0357/1405] KVM: selftests: x86_64: Remove redundant newlines TEST_* functions append their own newline. Remove newlines from TEST_* callsites to avoid extra newlines in output. Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20231206170241.82801-12-ajones@ventanamicro.com [sean: keep the newline in the "tsc\n" strncmp()] Signed-off-by: Sean Christopherson --- .../selftests/kvm/lib/x86_64/processor.c | 10 +++---- tools/testing/selftests/kvm/lib/x86_64/vmx.c | 6 ++-- tools/testing/selftests/kvm/x86_64/amx_test.c | 2 +- .../testing/selftests/kvm/x86_64/cpuid_test.c | 4 +-- .../selftests/kvm/x86_64/flds_emulation.h | 2 +- .../selftests/kvm/x86_64/hyperv_clock.c | 4 +-- .../testing/selftests/kvm/x86_64/hyperv_ipi.c | 2 +- .../selftests/kvm/x86_64/hyperv_tlb_flush.c | 2 +- .../selftests/kvm/x86_64/kvm_clock_test.c | 6 ++-- .../selftests/kvm/x86_64/platform_info_test.c | 2 +- .../kvm/x86_64/pmu_event_filter_test.c | 2 +- .../selftests/kvm/x86_64/sev_migrate_tests.c | 28 +++++++++---------- .../smaller_maxphyaddr_emulation_test.c | 4 +-- .../selftests/kvm/x86_64/sync_regs_test.c | 10 +++---- .../kvm/x86_64/ucna_injection_test.c | 8 +++--- .../selftests/kvm/x86_64/userspace_io_test.c | 2 +- .../kvm/x86_64/vmx_apic_access_test.c | 2 +- .../selftests/kvm/x86_64/vmx_dirty_log_test.c | 16 +++++------ .../vmx_exception_with_invalid_guest_state.c | 2 +- .../selftests/kvm/x86_64/xapic_ipi_test.c | 8 +++--- .../selftests/kvm/x86_64/xcr0_cpuid_test.c | 2 +- .../selftests/kvm/x86_64/xss_msr_test.c | 2 +- 22 files changed, 63 insertions(+), 63 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index d8288374078e4..4bc52948447d8 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -170,10 +170,10 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, * this level. */ TEST_ASSERT(current_level != target_level, - "Cannot create hugepage at level: %u, vaddr: 0x%lx\n", + "Cannot create hugepage at level: %u, vaddr: 0x%lx", current_level, vaddr); TEST_ASSERT(!(*pte & PTE_LARGE_MASK), - "Cannot create page table at level: %u, vaddr: 0x%lx\n", + "Cannot create page table at level: %u, vaddr: 0x%lx", current_level, vaddr); } return pte; @@ -220,7 +220,7 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) /* Fill in page table entry. */ pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), - "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr); + "PTE already present for 4k page at vaddr: 0x%lx", vaddr); *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); } @@ -253,7 +253,7 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) if (*pte & PTE_LARGE_MASK) { TEST_ASSERT(*level == PG_LEVEL_NONE || *level == current_level, - "Unexpected hugepage at level %d\n", current_level); + "Unexpected hugepage at level %d", current_level); *level = current_level; } @@ -825,7 +825,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) struct kvm_regs regs; TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" - " num: %u\n", + " num: %u", num); va_start(ap, num); diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 59d97531c9b17..089b8925b6b22 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -54,7 +54,7 @@ int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) /* KVM should return supported EVMCS version range */ TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) && (evmcs_ver & 0xff) > 0, - "Incorrect EVMCS version range: %x:%x\n", + "Incorrect EVMCS version range: %x:%x", evmcs_ver & 0xff, evmcs_ver >> 8); return evmcs_ver; @@ -387,10 +387,10 @@ static void nested_create_pte(struct kvm_vm *vm, * this level. */ TEST_ASSERT(current_level != target_level, - "Cannot create hugepage at level: %u, nested_paddr: 0x%lx\n", + "Cannot create hugepage at level: %u, nested_paddr: 0x%lx", current_level, nested_paddr); TEST_ASSERT(!pte->page_size, - "Cannot create page table at level: %u, nested_paddr: 0x%lx\n", + "Cannot create page table at level: %u, nested_paddr: 0x%lx", current_level, nested_paddr); } } diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index 11329e5ff945e..f7da543570ad8 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -296,7 +296,7 @@ int main(int argc, char *argv[]) void *tiles_data = (void *)addr_gva2hva(vm, tiledata); /* Only check TMM0 register, 1 tile */ ret = memcmp(amx_start, tiles_data, TILE_SIZE); - TEST_ASSERT(ret == 0, "memcmp failed, ret=%d\n", ret); + TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret); kvm_x86_state_cleanup(state); break; case 9: diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c index 3b34d8156d1c9..8c579ce714e9a 100644 --- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c @@ -84,7 +84,7 @@ static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, TEST_ASSERT(e1->function == e2->function && e1->index == e2->index && e1->flags == e2->flags, - "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x\n", + "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x", i, e1->function, e1->index, e1->flags, e2->function, e2->index, e2->flags); @@ -170,7 +170,7 @@ static void test_get_cpuid2(struct kvm_vcpu *vcpu) vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid); TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent, - "KVM didn't update nent on success, wanted %u, got %u\n", + "KVM didn't update nent on success, wanted %u, got %u", vcpu->cpuid->nent, cpuid->nent); for (i = 0; i < vcpu->cpuid->nent; i++) { diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86_64/flds_emulation.h index 0a1573d52882b..37b1a9f528644 100644 --- a/tools/testing/selftests/kvm/x86_64/flds_emulation.h +++ b/tools/testing/selftests/kvm/x86_64/flds_emulation.h @@ -41,7 +41,7 @@ static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu) insn_bytes = run->emulation_failure.insn_bytes; TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0, - "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x\n", + "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x", insn_bytes[0], insn_bytes[1]); vcpu_regs_get(vcpu, ®s); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c index f5e1e98f04f9e..65690d916db7e 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c @@ -220,7 +220,7 @@ int main(void) tsc_page_gva = vm_vaddr_alloc_page(vm); memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize()); TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, - "TSC page has to be page aligned\n"); + "TSC page has to be page aligned"); vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva)); host_check_tsc_msr_rdtsc(vcpu); @@ -237,7 +237,7 @@ int main(void) break; case UCALL_DONE: /* Keep in sync with guest_main() */ - TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d\n", + TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d", stage); goto out; default: diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c index 65e5f4c05068a..f1617762c22fe 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c @@ -289,7 +289,7 @@ int main(int argc, char *argv[]) switch (get_ucall(vcpu[0], &uc)) { case UCALL_SYNC: TEST_ASSERT(uc.args[1] == stage, - "Unexpected stage: %ld (%d expected)\n", + "Unexpected stage: %ld (%d expected)", uc.args[1], stage); break; case UCALL_DONE: diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c index c4443f71f8dd0..05b56095cf76f 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c @@ -658,7 +658,7 @@ int main(int argc, char *argv[]) switch (get_ucall(vcpu[0], &uc)) { case UCALL_SYNC: TEST_ASSERT(uc.args[1] == stage, - "Unexpected stage: %ld (%d expected)\n", + "Unexpected stage: %ld (%d expected)", uc.args[1], stage); break; case UCALL_ABORT: diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c index 1778704360a66..3e0b7d51abdaa 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c @@ -92,7 +92,7 @@ static void setup_clock(struct kvm_vm *vm, struct test_case *test_case) break; } while (errno == EINTR); - TEST_ASSERT(!r, "clock_gettime() failed: %d\n", r); + TEST_ASSERT(!r, "clock_gettime() failed: %d", r); data.realtime = ts.tv_sec * NSEC_PER_SEC; data.realtime += ts.tv_nsec; @@ -127,7 +127,7 @@ static void enter_guest(struct kvm_vcpu *vcpu) handle_abort(&uc); return; default: - TEST_ASSERT(0, "unhandled ucall: %ld\n", uc.cmd); + TEST_ASSERT(0, "unhandled ucall: %ld", uc.cmd); } } } @@ -154,7 +154,7 @@ static void check_clocksource(void) } clk_name = malloc(st.st_size); - TEST_ASSERT(clk_name, "failed to allocate buffer to read file\n"); + TEST_ASSERT(clk_name, "failed to allocate buffer to read file"); if (!fgets(clk_name, st.st_size, fp)) { pr_info("failed to read clocksource file: %d; assuming TSC.\n", diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index c9a07963d68aa..87011965dc416 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -44,7 +44,7 @@ static void test_msr_platform_info_enabled(struct kvm_vcpu *vcpu) get_ucall(vcpu, &uc); TEST_ASSERT(uc.cmd == UCALL_SYNC, - "Received ucall other than UCALL_SYNC: %lu\n", uc.cmd); + "Received ucall other than UCALL_SYNC: %lu", uc.cmd); TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) == MSR_PLATFORM_INFO_MAX_TURBO_RATIO, "Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.", diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 283cc55597a4f..a3bd54b925aba 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -866,7 +866,7 @@ static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx, * userspace doesn't set any pmu filter. */ count = run_vcpu_to_sync(vcpu); - TEST_ASSERT(count, "Unexpected count value: %ld\n", count); + TEST_ASSERT(count, "Unexpected count value: %ld", count); for (i = 0; i < BIT(nr_fixed_counters); i++) { bitmap = BIT(i); diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c index c7ef97561038e..a49828adf2949 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c +++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c @@ -91,7 +91,7 @@ static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src) int ret; ret = __sev_migrate_from(dst, src); - TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d\n", ret, errno); + TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d", ret, errno); } static void test_sev_migrate_from(bool es) @@ -113,7 +113,7 @@ static void test_sev_migrate_from(bool es) /* Migrate the guest back to the original VM. */ ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]); TEST_ASSERT(ret == -1 && errno == EIO, - "VM that was migrated from should be dead. ret %d, errno: %d\n", ret, + "VM that was migrated from should be dead. ret %d, errno: %d", ret, errno); kvm_vm_free(src_vm); @@ -172,7 +172,7 @@ static void test_sev_migrate_parameters(void) vm_no_sev = aux_vm_create(true); ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev); TEST_ASSERT(ret == -1 && errno == EINVAL, - "Migrations require SEV enabled. ret %d, errno: %d\n", ret, + "Migrations require SEV enabled. ret %d, errno: %d", ret, errno); if (!have_sev_es) @@ -187,25 +187,25 @@ static void test_sev_migrate_parameters(void) ret = __sev_migrate_from(sev_vm, sev_es_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d\n", + "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d", ret, errno); ret = __sev_migrate_from(sev_es_vm, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d\n", + "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d", ret, errno); ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d\n", + "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d", ret, errno); ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa); TEST_ASSERT( ret == -1 && errno == EINVAL, - "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d\n", + "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d", ret, errno); kvm_vm_free(sev_vm); @@ -227,7 +227,7 @@ static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src) int ret; ret = __sev_mirror_create(dst, src); - TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d\n", ret, errno); + TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d", ret, errno); } static void verify_mirror_allowed_cmds(int vm_fd) @@ -259,7 +259,7 @@ static void verify_mirror_allowed_cmds(int vm_fd) ret = __sev_ioctl(vm_fd, cmd_id, NULL, &fw_error); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able call command: %d. ret: %d, errno: %d\n", + "Should not be able call command: %d. ret: %d, errno: %d", cmd_id, ret, errno); } @@ -301,18 +301,18 @@ static void test_sev_mirror_parameters(void) ret = __sev_mirror_create(sev_vm, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able copy context to self. ret: %d, errno: %d\n", + "Should not be able copy context to self. ret: %d, errno: %d", ret, errno); ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu); TEST_ASSERT(ret == -1 && errno == EINVAL, - "Copy context requires SEV enabled. ret %d, errno: %d\n", ret, + "Copy context requires SEV enabled. ret %d, errno: %d", ret, errno); ret = __sev_mirror_create(vm_with_vcpu, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n", + "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d", ret, errno); if (!have_sev_es) @@ -322,13 +322,13 @@ static void test_sev_mirror_parameters(void) ret = __sev_mirror_create(sev_vm, sev_es_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d\n", + "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d", ret, errno); ret = __sev_mirror_create(sev_es_vm, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d\n", + "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d", ret, errno); kvm_vm_free(sev_es_vm); diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c index 06edf00a97d61..1a46dd7bb3913 100644 --- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c +++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c @@ -74,7 +74,7 @@ int main(int argc, char *argv[]) MEM_REGION_SIZE / PAGE_SIZE, 0); gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE, MEM_REGION_GPA, MEM_REGION_SLOT); - TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n"); + TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc"); virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1); hva = addr_gpa2hva(vm, MEM_REGION_GPA); memset(hva, 0, PAGE_SIZE); @@ -102,7 +102,7 @@ int main(int argc, char *argv[]) case UCALL_DONE: break; default: - TEST_FAIL("Unrecognized ucall: %lu\n", uc.cmd); + TEST_FAIL("Unrecognized ucall: %lu", uc.cmd); } kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index 00965ba33f730..a91b5b145fa35 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -46,7 +46,7 @@ static void compare_regs(struct kvm_regs *left, struct kvm_regs *right) #define REG_COMPARE(reg) \ TEST_ASSERT(left->reg == right->reg, \ "Register " #reg \ - " values did not match: 0x%llx, 0x%llx\n", \ + " values did not match: 0x%llx, 0x%llx", \ left->reg, right->reg) REG_COMPARE(rax); REG_COMPARE(rbx); @@ -230,14 +230,14 @@ int main(int argc, char *argv[]) run->kvm_valid_regs = INVALID_SYNC_FIELD; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_valid_regs = 0; run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_valid_regs = 0; @@ -245,14 +245,14 @@ int main(int argc, char *argv[]) run->kvm_dirty_regs = INVALID_SYNC_FIELD; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_dirty_regs = 0; run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_dirty_regs = 0; diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c index 0ed32ec903d03..dcbb3c29fb8e9 100644 --- a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c +++ b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c @@ -143,7 +143,7 @@ static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu) TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC, - "Expect UCALL_SYNC\n"); + "Expect UCALL_SYNC"); TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected."); printf("vCPU received GP in guest.\n"); } @@ -188,7 +188,7 @@ static void *run_ucna_injection(void *arg) TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO); TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC, - "Expect UCALL_SYNC\n"); + "Expect UCALL_SYNC"); TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA."); printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR); @@ -198,7 +198,7 @@ static void *run_ucna_injection(void *arg) TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO); TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC, - "Expect UCALL_SYNC\n"); + "Expect UCALL_SYNC"); TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA."); printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR); @@ -208,7 +208,7 @@ static void *run_ucna_injection(void *arg) TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO); if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) { - TEST_ASSERT(false, "vCPU assertion failure: %s.\n", + TEST_ASSERT(false, "vCPU assertion failure: %s.", (const char *)uc.args[0]); } diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c index 255c50b0dc326..9481cbcf284f6 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c @@ -71,7 +71,7 @@ int main(int argc, char *argv[]) break; TEST_ASSERT(run->io.port == 0x80, - "Expected I/O at port 0x80, got port 0x%x\n", run->io.port); + "Expected I/O at port 0x80, got port 0x%x", run->io.port); /* * Modify the rep string count in RCX: 2 => 1 and 3 => 8192. diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c index 2bed5fb3a0d6e..a81a24761aac0 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c @@ -99,7 +99,7 @@ int main(int argc, char *argv[]) TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); TEST_ASSERT(run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION, - "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u\n", + "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u", run->internal.suberror); break; } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c index e4ad5fef52ffc..7f6f5f23fb9b6 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -128,17 +128,17 @@ int main(int argc, char *argv[]) */ kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); if (uc.args[1]) { - TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean\n"); - TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest\n"); + TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean"); + TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest"); } else { - TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty\n"); - TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest\n"); + TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty"); + TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest"); } - TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty\n"); - TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest\n"); - TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty\n"); - TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest\n"); + TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty"); + TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest"); + TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty"); + TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest"); break; case UCALL_DONE: done = true; diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c index a9b827c69f32c..fad3634fd9eb6 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c @@ -28,7 +28,7 @@ static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu) TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, - "Expected emulation failure, got %d\n", + "Expected emulation failure, got %d", run->emulation_failure.suberror); } diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c index 67ac2a3292efd..725c206ba0b92 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c @@ -216,7 +216,7 @@ static void *vcpu_thread(void *arg) "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" "Halter TPR=%#x PPR=%#x LVR=%#x\n" "Migrations attempted: %lu\n" - "Migrations completed: %lu\n", + "Migrations completed: %lu", vcpu->id, (const char *)uc.args[0], params->data->ipis_sent, params->data->hlt_count, params->data->wake_count, @@ -288,7 +288,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, } TEST_ASSERT(nodes > 1, - "Did not find at least 2 numa nodes. Can't do migration\n"); + "Did not find at least 2 numa nodes. Can't do migration"); fprintf(stderr, "Migrating amongst %d nodes found\n", nodes); @@ -347,7 +347,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, wake_count != data->wake_count, "IPI, HLT and wake count have not increased " "in the last %lu seconds. " - "HLTer is likely hung.\n", interval_secs); + "HLTer is likely hung.", interval_secs); ipis_sent = data->ipis_sent; hlt_count = data->hlt_count; @@ -381,7 +381,7 @@ void get_cmdline_args(int argc, char *argv[], int *run_secs, "-m adds calls to migrate_pages while vCPUs are running." " Default is no migrations.\n" "-d - delay between migrate_pages() calls." - " Default is %d microseconds.\n", + " Default is %d microseconds.", DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS); } } diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c index dc6217440db3a..25a0b0db5c3c9 100644 --- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c @@ -116,7 +116,7 @@ int main(int argc, char *argv[]) vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s),\n", + "Unexpected exit reason: %u (%s),", run->exit_reason, exit_reason_str(run->exit_reason)); diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c index e0ddf47362e77..167c97abff1b8 100644 --- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c +++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c @@ -29,7 +29,7 @@ int main(int argc, char *argv[]) xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS); TEST_ASSERT(xss_val == 0, - "MSR_IA32_XSS should be initialized to zero\n"); + "MSR_IA32_XSS should be initialized to zero"); vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val); -- GitLab From 46fee9e38995af9ae16a8cc7d05031486d44cf35 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 9 Jan 2024 14:03:02 -0800 Subject: [PATCH 0358/1405] KVM: selftests: Delete superfluous, unused "stage" variable in AMX test Delete the AMX's tests "stage" counter, as the counter is no longer used, which makes clang unhappy: x86_64/amx_test.c:224:6: error: variable 'stage' set but not used int stage, ret; ^ 1 error generated. Note, "stage" was never really used, it just happened to be dumped out by a (failed) assertion on run->exit_reason, i.e. the AMX test has no concept of stages, the code was likely copy+pasted from a different test. Fixes: c96f57b08012 ("KVM: selftests: Make vCPU exit reason test assertion common") Reviewed-by: Jim Mattson Link: https://lore.kernel.org/r/20240109220302.399296-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86_64/amx_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index f7da543570ad8..eae521f050e09 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -221,7 +221,7 @@ int main(int argc, char *argv[]) vm_vaddr_t amx_cfg, tiledata, xstate; struct ucall uc; u32 amx_offset; - int stage, ret; + int ret; /* * Note, all off-by-default features must be enabled before anything @@ -263,7 +263,7 @@ int main(int argc, char *argv[]) memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE)); vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate); - for (stage = 1; ; stage++) { + for (;;) { vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); -- GitLab From 8ad4855273488c9bd5320b3fee80f66f0023f326 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 29 Jan 2024 09:58:46 +0100 Subject: [PATCH 0359/1405] KVM: selftests: Avoid infinite loop in hyperv_features when invtsc is missing When X86_FEATURE_INVTSC is missing, guest_test_msrs_access() was supposed to skip testing dependent Hyper-V invariant TSC feature. Unfortunately, 'continue' does not lead to that as stage is not incremented. Moreover, 'vm' allocated with vm_create_with_one_vcpu() is not freed and the test runs out of available file descriptors very quickly. Fixes: bd827bd77537 ("KVM: selftests: Test Hyper-V invariant TSC control") Signed-off-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20240129085847.2674082-1-vkuznets@redhat.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86_64/hyperv_features.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 4f4193fc74ffa..b923a285e96f9 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -454,7 +454,7 @@ static void guest_test_msrs_access(void) case 44: /* MSR is not available when CPUID feature bit is unset */ if (!has_invtsc) - continue; + goto next_stage; msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; msr->write = false; msr->fault_expected = true; @@ -462,7 +462,7 @@ static void guest_test_msrs_access(void) case 45: /* MSR is vailable when CPUID feature bit is set */ if (!has_invtsc) - continue; + goto next_stage; vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT); msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; msr->write = false; @@ -471,7 +471,7 @@ static void guest_test_msrs_access(void) case 46: /* Writing bits other than 0 is forbidden */ if (!has_invtsc) - continue; + goto next_stage; msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; msr->write = true; msr->write_val = 0xdeadbeef; @@ -480,7 +480,7 @@ static void guest_test_msrs_access(void) case 47: /* Setting bit 0 enables the feature */ if (!has_invtsc) - continue; + goto next_stage; msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; msr->write = true; msr->write_val = 1; @@ -513,6 +513,7 @@ static void guest_test_msrs_access(void) return; } +next_stage: stage++; kvm_vm_free(vm); } -- GitLab From c2a449a30fa2b5ddc1dd058e92e047157c9eeb9e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 29 Jan 2024 09:58:47 +0100 Subject: [PATCH 0360/1405] KVM: selftests: Fail tests when open() fails with !ENOENT open_path_or_exit() is used for '/dev/kvm', '/dev/sev', and '/sys/module/%s/parameters/%s' and skipping test when the entry is missing is completely reasonable. Other errors, however, may indicate a real issue which is easy to miss. E.g. when 'hyperv_features' test was entering an infinite loop the output was: ./hyperv_features Testing access to Hyper-V specific MSRs 1..0 # SKIP - /dev/kvm not available (errno: 24) and this can easily get overlooked. Keep ENOENT case 'special' for skipping tests and fail when open() results in any other errno. Signed-off-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20240129085847.2674082-2-vkuznets@redhat.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/kvm_util.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 8754abaff5e91..1b197426f29fc 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -27,7 +27,8 @@ int open_path_or_exit(const char *path, int flags) int fd; fd = open(path, flags); - __TEST_REQUIRE(fd >= 0, "%s not available (errno: %d)", path, errno); + __TEST_REQUIRE(fd >= 0 || errno != ENOENT, "Cannot open %s: %s", path, strerror(errno)); + TEST_ASSERT(fd >= 0, "Failed to open '%s'", path); return fd; } -- GitLab From 2f77465b05b1270c832b5e2ee27037672ad2a10a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 30 Jan 2024 19:01:35 +0100 Subject: [PATCH 0361/1405] x86/efistub: Avoid placing the kernel below LOAD_PHYSICAL_ADDR The EFI stub's kernel placement logic randomizes the physical placement of the kernel by taking all available memory into account, and picking a region at random, based on a random seed. When KASLR is disabled, this seed is set to 0x0, and this results in the lowest available region of memory to be selected for loading the kernel, even if this is below LOAD_PHYSICAL_ADDR. Some of this memory is typically reserved for the GFP_DMA region, to accommodate masters that can only access the first 16 MiB of system memory. Even if such devices are rare these days, we may still end up with a warning in the kernel log, as reported by Tom: swapper/0: page allocation failure: order:10, mode:0xcc1(GFP_KERNEL|GFP_DMA), nodemask=(null),cpuset=/,mems_allowed=0 Fix this by tweaking the random allocation logic to accept a low bound on the placement, and set it to LOAD_PHYSICAL_ADDR. Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") Reported-by: Tom Englund Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218404 Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/efistub.h | 3 ++- drivers/firmware/efi/libstub/kaslr.c | 2 +- drivers/firmware/efi/libstub/randomalloc.c | 12 +++++++----- drivers/firmware/efi/libstub/x86-stub.c | 1 + drivers/firmware/efi/libstub/zboot.c | 2 +- 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 212687c30d79c..c04b82ea40f21 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -956,7 +956,8 @@ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out); efi_status_t efi_random_alloc(unsigned long size, unsigned long align, unsigned long *addr, unsigned long random_seed, - int memory_type, unsigned long alloc_limit); + int memory_type, unsigned long alloc_min, + unsigned long alloc_max); efi_status_t efi_random_get_seed(void); diff --git a/drivers/firmware/efi/libstub/kaslr.c b/drivers/firmware/efi/libstub/kaslr.c index 62d63f7a2645b..1a9808012abd3 100644 --- a/drivers/firmware/efi/libstub/kaslr.c +++ b/drivers/firmware/efi/libstub/kaslr.c @@ -119,7 +119,7 @@ efi_status_t efi_kaslr_relocate_kernel(unsigned long *image_addr, */ status = efi_random_alloc(*reserve_size, min_kimg_align, reserve_addr, phys_seed, - EFI_LOADER_CODE, EFI_ALLOC_LIMIT); + EFI_LOADER_CODE, 0, EFI_ALLOC_LIMIT); if (status != EFI_SUCCESS) efi_warn("efi_random_alloc() failed: 0x%lx\n", status); } else { diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index 674a064b8f7ad..4e96a855fdf47 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -17,7 +17,7 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, unsigned long size, unsigned long align_shift, - u64 alloc_limit) + u64 alloc_min, u64 alloc_max) { unsigned long align = 1UL << align_shift; u64 first_slot, last_slot, region_end; @@ -30,11 +30,11 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, return 0; region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1, - alloc_limit); + alloc_max); if (region_end < size) return 0; - first_slot = round_up(md->phys_addr, align); + first_slot = round_up(max(md->phys_addr, alloc_min), align); last_slot = round_down(region_end - size + 1, align); if (first_slot > last_slot) @@ -56,7 +56,8 @@ efi_status_t efi_random_alloc(unsigned long size, unsigned long *addr, unsigned long random_seed, int memory_type, - unsigned long alloc_limit) + unsigned long alloc_min, + unsigned long alloc_max) { unsigned long total_slots = 0, target_slot; unsigned long total_mirrored_slots = 0; @@ -78,7 +79,8 @@ efi_status_t efi_random_alloc(unsigned long size, efi_memory_desc_t *md = (void *)map->map + map_offset; unsigned long slots; - slots = get_entry_num_slots(md, size, ilog2(align), alloc_limit); + slots = get_entry_num_slots(md, size, ilog2(align), alloc_min, + alloc_max); MD_NUM_SLOTS(md) = slots; total_slots += slots; if (md->attribute & EFI_MEMORY_MORE_RELIABLE) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index cb0be88c81317..99429bc4b0c7e 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -799,6 +799,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr, seed[0], EFI_LOADER_CODE, + LOAD_PHYSICAL_ADDR, EFI_X86_KERNEL_ALLOC_LIMIT); if (status != EFI_SUCCESS) return status; diff --git a/drivers/firmware/efi/libstub/zboot.c b/drivers/firmware/efi/libstub/zboot.c index bdb17eac0cb40..1ceace9567586 100644 --- a/drivers/firmware/efi/libstub/zboot.c +++ b/drivers/firmware/efi/libstub/zboot.c @@ -119,7 +119,7 @@ efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab) } status = efi_random_alloc(alloc_size, min_kimg_align, &image_base, - seed, EFI_LOADER_CODE, EFI_ALLOC_LIMIT); + seed, EFI_LOADER_CODE, 0, EFI_ALLOC_LIMIT); if (status != EFI_SUCCESS) { efi_err("Failed to allocate memory\n"); goto free_cmdline; -- GitLab From aa0e784dea7c1a026aabff9db1cb5d2bd92b3e92 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Fri, 19 Jan 2024 18:06:35 +0800 Subject: [PATCH 0362/1405] efi/libstub: Add one kernel-doc comment Add the description of @memory_type to silence the warning: drivers/firmware/efi/libstub/alignedmem.c:27: warning: Function parameter or struct member 'memory_type' not described in 'efi_allocate_pages_aligned' Signed-off-by: Yang Li [ardb: tweak comment] Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/alignedmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/libstub/alignedmem.c b/drivers/firmware/efi/libstub/alignedmem.c index 6b83c492c3b82..31928bd87e0ff 100644 --- a/drivers/firmware/efi/libstub/alignedmem.c +++ b/drivers/firmware/efi/libstub/alignedmem.c @@ -14,6 +14,7 @@ * @max: the address that the last allocated memory page shall not * exceed * @align: minimum alignment of the base of the allocation + * @memory_type: the type of memory to allocate * * Allocate pages as EFI_LOADER_DATA. The allocated pages are aligned according * to @align, which should be >= EFI_ALLOC_ALIGN. The last allocated page will -- GitLab From 10c02aad111df02088d1a81792a709f6a7eca6cc Mon Sep 17 00:00:00 2001 From: Sebastian Ene Date: Wed, 24 Jan 2024 09:10:28 +0000 Subject: [PATCH 0363/1405] KVM: arm64: Fix circular locking dependency The rule inside kvm enforces that the vcpu->mutex is taken *inside* kvm->lock. The rule is violated by the pkvm_create_hyp_vm() which acquires the kvm->lock while already holding the vcpu->mutex lock from kvm_vcpu_ioctl(). Avoid the circular locking dependency altogether by protecting the hyp vm handle with the config_lock, much like we already do for other forms of VM-scoped data. Signed-off-by: Sebastian Ene Cc: stable@vger.kernel.org Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240124091027.1477174-2-sebastianene@google.com --- arch/arm64/kvm/pkvm.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 8350fb8fee0b9..b7be96a535973 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -101,6 +101,17 @@ void __init kvm_hyp_reserve(void) hyp_mem_base); } +static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm) +{ + if (host_kvm->arch.pkvm.handle) { + WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, + host_kvm->arch.pkvm.handle)); + } + + host_kvm->arch.pkvm.handle = 0; + free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc); +} + /* * Allocates and donates memory for hypervisor VM structs at EL2. * @@ -181,7 +192,7 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm) return 0; destroy_vm: - pkvm_destroy_hyp_vm(host_kvm); + __pkvm_destroy_hyp_vm(host_kvm); return ret; free_vm: free_pages_exact(hyp_vm, hyp_vm_sz); @@ -194,23 +205,19 @@ int pkvm_create_hyp_vm(struct kvm *host_kvm) { int ret = 0; - mutex_lock(&host_kvm->lock); + mutex_lock(&host_kvm->arch.config_lock); if (!host_kvm->arch.pkvm.handle) ret = __pkvm_create_hyp_vm(host_kvm); - mutex_unlock(&host_kvm->lock); + mutex_unlock(&host_kvm->arch.config_lock); return ret; } void pkvm_destroy_hyp_vm(struct kvm *host_kvm) { - if (host_kvm->arch.pkvm.handle) { - WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, - host_kvm->arch.pkvm.handle)); - } - - host_kvm->arch.pkvm.handle = 0; - free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc); + mutex_lock(&host_kvm->arch.config_lock); + __pkvm_destroy_hyp_vm(host_kvm); + mutex_unlock(&host_kvm->arch.config_lock); } int pkvm_init_host_vm(struct kvm *host_kvm) -- GitLab From 5a287d3d2b9de2b3e747132c615599907ba5c3c1 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 26 Jan 2024 19:45:31 +0100 Subject: [PATCH 0364/1405] lsm: fix default return value of the socket_getpeersec_*() hooks For these hooks the true "neutral" value is -EOPNOTSUPP, which is currently what is returned when no LSM provides this hook and what LSMs return when there is no security context set on the socket. Correct the value in and adjust the dispatch functions in security/security.c to avoid issues when the BPF LSM is enabled. Cc: stable@vger.kernel.org Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks") Signed-off-by: Ondrej Mosnacek [PM: subject line tweak] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 4 ++-- security/security.c | 31 +++++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 185924c563787..76458b6d53da7 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -315,9 +315,9 @@ LSM_HOOK(int, 0, socket_getsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how) LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb) -LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock, +LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_stream, struct socket *sock, sockptr_t optval, sockptr_t optlen, unsigned int len) -LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock, +LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_dgram, struct socket *sock, struct sk_buff *skb, u32 *secid) LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority) LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk) diff --git a/security/security.c b/security/security.c index 6196ccaba433a..3aaad75c9ce85 100644 --- a/security/security.c +++ b/security/security.c @@ -4624,8 +4624,20 @@ EXPORT_SYMBOL(security_sock_rcv_skb); int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval, sockptr_t optlen, unsigned int len) { - return call_int_hook(socket_getpeersec_stream, -ENOPROTOOPT, sock, - optval, optlen, len); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_stream, + list) { + rc = hp->hook.socket_getpeersec_stream(sock, optval, optlen, + len); + if (rc != LSM_RET_DEFAULT(socket_getpeersec_stream)) + return rc; + } + return LSM_RET_DEFAULT(socket_getpeersec_stream); } /** @@ -4645,8 +4657,19 @@ int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval, int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid) { - return call_int_hook(socket_getpeersec_dgram, -ENOPROTOOPT, sock, - skb, secid); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_dgram, + list) { + rc = hp->hook.socket_getpeersec_dgram(sock, skb, secid); + if (rc != LSM_RET_DEFAULT(socket_getpeersec_dgram)) + return rc; + } + return LSM_RET_DEFAULT(socket_getpeersec_dgram); } EXPORT_SYMBOL(security_socket_getpeersec_dgram); -- GitLab From 80441f76ee67002437db61f3b317ed80cce085d2 Mon Sep 17 00:00:00 2001 From: Brenton Simpson Date: Tue, 30 Jan 2024 13:34:16 -0800 Subject: [PATCH 0365/1405] Input: xpad - add Lenovo Legion Go controllers The Lenovo Legion Go is a handheld gaming system, similar to a Steam Deck. It has a gamepad (including rear paddles), 3 gyroscopes, a trackpad, volume buttons, a power button, and 2 LED ring lights. The Legion Go firmware presents these controls as a USB hub with various devices attached. In its default state, the gamepad is presented as an Xbox controller connected to this hub. (By holding a combination of buttons, it can be changed to use the older DirectInput API.) This patch teaches the existing Xbox controller module `xpad` to bind to the controller in the Legion Go, which enables support for the: - directional pad, - analog sticks (including clicks), - X, Y, A, B, - start and select (or menu and capture), - shoulder buttons, and - rumble. The trackpad, touchscreen, volume controls, and power button are already supported via existing kernel modules. Two of the face buttons, the gyroscopes, rear paddles, and LEDs are not. After this patch lands, the Legion Go will be mostly functional in Linux, out-of-the-box. The various components of the USB hub can be synthesized into a single logical controller (including the additional buttons) in userspace with [Handheld Daemon](https://github.com/hhd-dev/hhd), which makes the Go fully functional. Signed-off-by: Brenton Simpson Link: https://lore.kernel.org/r/20240118183546.418064-1-appsforartists@google.com Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index b1244d7df6cc9..7c4b2a5cc1b54 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -294,6 +294,7 @@ static const struct xpad_device { { 0x1689, 0xfd00, "Razer Onza Tournament Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfd01, "Razer Onza Classic Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 }, + { 0x17ef, 0x6182, "Lenovo Legion Controller for Windows", 0, XTYPE_XBOX360 }, { 0x1949, 0x041a, "Amazon Game Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, @@ -491,6 +492,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x15e4), /* Numark Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x162e), /* Joytech Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */ + XPAD_XBOX360_VENDOR(0x17ef), /* Lenovo */ XPAD_XBOX360_VENDOR(0x1949), /* Amazon controllers */ XPAD_XBOX360_VENDOR(0x1bad), /* Harmonix Rock Band guitar and drums */ XPAD_XBOX360_VENDOR(0x20d6), /* PowerA controllers */ -- GitLab From 98323e9d70172f1b46d1cadb20d6c54abf62870d Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Wed, 17 Jan 2024 20:05:45 +0100 Subject: [PATCH 0366/1405] topology: Set capacity_freq_ref in all cases If "capacity-dmips-mhz" is not set, raw_capacity is null and we skip the normalization step which includes setting per_cpu capacity_freq_ref. Always register the notifier but skip the capacity normalization if raw_capacity is null. Fixes: 9942cb22ea45 ("sched/topology: Add a new arch_scale_freq_ref() method") Signed-off-by: Vincent Guittot Acked-by: Sudeep Holla Tested-by: Pierre Gondois Tested-by: Mark Brown Tested-by: Paul Barker Reviewed-by: Dietmar Eggemann Tested-by: Dietmar Eggemann Link: https://lore.kernel.org/r/20240117190545.596057-1-vincent.guittot@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/arch_topology.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 018ac202de345..024b78a0cfc11 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -431,9 +431,6 @@ init_cpu_capacity_callback(struct notifier_block *nb, struct cpufreq_policy *policy = data; int cpu; - if (!raw_capacity) - return 0; - if (val != CPUFREQ_CREATE_POLICY) return 0; @@ -450,9 +447,11 @@ init_cpu_capacity_callback(struct notifier_block *nb, } if (cpumask_empty(cpus_to_visit)) { - topology_normalize_cpu_scale(); - schedule_work(&update_topology_flags_work); - free_raw_capacity(); + if (raw_capacity) { + topology_normalize_cpu_scale(); + schedule_work(&update_topology_flags_work); + free_raw_capacity(); + } pr_debug("cpu_capacity: parsing done\n"); schedule_work(&parsing_done_work); } @@ -472,7 +471,7 @@ static int __init register_cpufreq_notifier(void) * On ACPI-based systems skip registering cpufreq notifier as cpufreq * information is not needed for cpu capacity initialization. */ - if (!acpi_disabled || !raw_capacity) + if (!acpi_disabled) return -EINVAL; if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) -- GitLab From a4e61de63e34860c36a71d1a364edba16fb6203b Mon Sep 17 00:00:00 2001 From: Ekansh Gupta Date: Mon, 8 Jan 2024 17:18:33 +0530 Subject: [PATCH 0367/1405] misc: fastrpc: Mark all sessions as invalid in cb_remove In remoteproc shutdown sequence, rpmsg_remove will get called which would depopulate all the child nodes that have been created during rpmsg_probe. This would result in cb_remove call for all the context banks for the remoteproc. In cb_remove function, session 0 is getting skipped which is not correct as session 0 will never become available again. Add changes to mark session 0 also as invalid. Fixes: f6f9279f2bf0 ("misc: fastrpc: Add Qualcomm fastrpc basic driver model") Cc: stable Signed-off-by: Ekansh Gupta Link: https://lore.kernel.org/r/20240108114833.20480-1-quic_ekangupt@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 1c6c62a7f7f55..03319a1fa97fd 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -2191,7 +2191,7 @@ static int fastrpc_cb_remove(struct platform_device *pdev) int i; spin_lock_irqsave(&cctx->lock, flags); - for (i = 1; i < FASTRPC_MAX_SESSIONS; i++) { + for (i = 0; i < FASTRPC_MAX_SESSIONS; i++) { if (cctx->session[i].sid == sess->sid) { cctx->session[i].valid = false; cctx->sesscount--; -- GitLab From ac9762a74c7ca7cbfcb4c65f5871373653a046ac Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 26 Jan 2024 15:24:10 +0000 Subject: [PATCH 0368/1405] misc: open-dice: Fix spurious lockdep warning When probing the open-dice driver with PROVE_LOCKING=y, lockdep complains that the mutex in 'drvdata->lock' has a non-static key: | INFO: trying to register non-static key. | The code is fine but needs lockdep annotation, or maybe | you didn't initialize this object before use? | turning off the locking correctness validator. Fix the problem by initialising the mutex memory with mutex_init() instead of __MUTEX_INITIALIZER(). Cc: Arnd Bergmann Cc: David Brazdil Cc: Greg Kroah-Hartman Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20240126152410.10148-1-will@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/open-dice.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/open-dice.c b/drivers/misc/open-dice.c index 8aea2d070a40c..d279a4f195e2a 100644 --- a/drivers/misc/open-dice.c +++ b/drivers/misc/open-dice.c @@ -140,7 +140,6 @@ static int __init open_dice_probe(struct platform_device *pdev) return -ENOMEM; *drvdata = (struct open_dice_drvdata){ - .lock = __MUTEX_INITIALIZER(drvdata->lock), .rmem = rmem, .misc = (struct miscdevice){ .parent = dev, @@ -150,6 +149,7 @@ static int __init open_dice_probe(struct platform_device *pdev) .mode = 0600, }, }; + mutex_init(&drvdata->lock); /* Index overflow check not needed, misc_register() will fail. */ snprintf(drvdata->name, sizeof(drvdata->name), DRIVER_NAME"%u", dev_idx++); -- GitLab From b40f873a7c80dbafbb6f4a7a569f2dcaf969d283 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 29 Jan 2024 14:37:03 +0200 Subject: [PATCH 0369/1405] selftests: net: Add missing matchall classifier One of the test cases in the test_bridge_backup_port.sh selftest relies on a matchall classifier to drop unrelated traffic so that the Tx drop counter on the VXLAN device will only be incremented as a result of traffic generated by the test. However, the configuration option for the matchall classifier is missing from the configuration file which might explain the failures we see in the netdev CI [1]. Fix by adding CONFIG_NET_CLS_MATCHALL to the configuration file. [1] # Backup nexthop ID - invalid IDs # ------------------------------- [...] # TEST: Forwarding out of vx0 [ OK ] # TEST: No forwarding using backup nexthop ID [ OK ] # TEST: Tx drop increased [FAIL] # TEST: IPv6 address family nexthop as backup nexthop [ OK ] # TEST: No forwarding out of swp1 [ OK ] # TEST: Forwarding out of vx0 [ OK ] # TEST: No forwarding using backup nexthop ID [ OK ] # TEST: Tx drop increased [FAIL] [...] Fixes: b408453053fb ("selftests: net: Add bridge backup port and backup nexthop ID test") Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20240129123703.1857843-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index d4b38177ed04a..3d908b52f22f3 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -52,6 +52,7 @@ CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GACT=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_U32=m CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m -- GitLab From f5c3eb4b7251baba5cd72c9e93920e710ac8194a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Sat, 27 Jan 2024 18:50:32 +0100 Subject: [PATCH 0370/1405] bridge: mcast: fix disabled snooping after long uptime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original idea of the delay_time check was to not apply multicast snooping too early when an MLD querier appears. And to instead wait at least for MLD reports to arrive before switching from flooding to group based, MLD snooped forwarding, to avoid temporary packet loss. However in a batman-adv mesh network it was noticed that after 248 days of uptime 32bit MIPS based devices would start to signal that they had stopped applying multicast snooping due to missing queriers - even though they were the elected querier and still sending MLD queries themselves. While time_is_before_jiffies() generally is safe against jiffies wrap-arounds, like the code comments in jiffies.h explain, it won't be able to track a difference larger than ULONG_MAX/2. With a 32bit large jiffies and one jiffies tick every 10ms (CONFIG_HZ=100) on these MIPS devices running OpenWrt this would result in a difference larger than ULONG_MAX/2 after 248 (= 2^32/100/60/60/24/2) days and time_is_before_jiffies() would then start to return false instead of true. Leading to multicast snooping not being applied to multicast packets anymore. Fix this issue by using a proper timer_list object which won't have this ULONG_MAX/2 difference limitation. Fixes: b00589af3b04 ("bridge: disable snooping if there is no querier") Signed-off-by: Linus Lüssing Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20240127175033.9640-1-linus.luessing@c0d3.blue Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 20 +++++++++++++++----- net/bridge/br_private.h | 4 ++-- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d7d021af10298..2d7b732429588 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1762,6 +1762,10 @@ static void br_ip6_multicast_querier_expired(struct timer_list *t) } #endif +static void br_multicast_query_delay_expired(struct timer_list *t) +{ +} + static void br_multicast_select_own_querier(struct net_bridge_mcast *brmctx, struct br_ip *ip, struct sk_buff *skb) @@ -3198,7 +3202,7 @@ br_multicast_update_query_timer(struct net_bridge_mcast *brmctx, unsigned long max_delay) { if (!timer_pending(&query->timer)) - query->delay_time = jiffies + max_delay; + mod_timer(&query->delay_timer, jiffies + max_delay); mod_timer(&query->timer, jiffies + brmctx->multicast_querier_interval); } @@ -4041,13 +4045,11 @@ void br_multicast_ctx_init(struct net_bridge *br, brmctx->multicast_querier_interval = 255 * HZ; brmctx->multicast_membership_interval = 260 * HZ; - brmctx->ip4_other_query.delay_time = 0; brmctx->ip4_querier.port_ifidx = 0; seqcount_spinlock_init(&brmctx->ip4_querier.seq, &br->multicast_lock); brmctx->multicast_igmp_version = 2; #if IS_ENABLED(CONFIG_IPV6) brmctx->multicast_mld_version = 1; - brmctx->ip6_other_query.delay_time = 0; brmctx->ip6_querier.port_ifidx = 0; seqcount_spinlock_init(&brmctx->ip6_querier.seq, &br->multicast_lock); #endif @@ -4056,6 +4058,8 @@ void br_multicast_ctx_init(struct net_bridge *br, br_ip4_multicast_local_router_expired, 0); timer_setup(&brmctx->ip4_other_query.timer, br_ip4_multicast_querier_expired, 0); + timer_setup(&brmctx->ip4_other_query.delay_timer, + br_multicast_query_delay_expired, 0); timer_setup(&brmctx->ip4_own_query.timer, br_ip4_multicast_query_expired, 0); #if IS_ENABLED(CONFIG_IPV6) @@ -4063,6 +4067,8 @@ void br_multicast_ctx_init(struct net_bridge *br, br_ip6_multicast_local_router_expired, 0); timer_setup(&brmctx->ip6_other_query.timer, br_ip6_multicast_querier_expired, 0); + timer_setup(&brmctx->ip6_other_query.delay_timer, + br_multicast_query_delay_expired, 0); timer_setup(&brmctx->ip6_own_query.timer, br_ip6_multicast_query_expired, 0); #endif @@ -4197,10 +4203,12 @@ static void __br_multicast_stop(struct net_bridge_mcast *brmctx) { del_timer_sync(&brmctx->ip4_mc_router_timer); del_timer_sync(&brmctx->ip4_other_query.timer); + del_timer_sync(&brmctx->ip4_other_query.delay_timer); del_timer_sync(&brmctx->ip4_own_query.timer); #if IS_ENABLED(CONFIG_IPV6) del_timer_sync(&brmctx->ip6_mc_router_timer); del_timer_sync(&brmctx->ip6_other_query.timer); + del_timer_sync(&brmctx->ip6_other_query.delay_timer); del_timer_sync(&brmctx->ip6_own_query.timer); #endif } @@ -4643,13 +4651,15 @@ int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val) max_delay = brmctx->multicast_query_response_interval; if (!timer_pending(&brmctx->ip4_other_query.timer)) - brmctx->ip4_other_query.delay_time = jiffies + max_delay; + mod_timer(&brmctx->ip4_other_query.delay_timer, + jiffies + max_delay); br_multicast_start_querier(brmctx, &brmctx->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) if (!timer_pending(&brmctx->ip6_other_query.timer)) - brmctx->ip6_other_query.delay_time = jiffies + max_delay; + mod_timer(&brmctx->ip6_other_query.delay_timer, + jiffies + max_delay); br_multicast_start_querier(brmctx, &brmctx->ip6_own_query); #endif diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index b0a92c344722b..86ea5e6689b5c 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -78,7 +78,7 @@ struct bridge_mcast_own_query { /* other querier */ struct bridge_mcast_other_query { struct timer_list timer; - unsigned long delay_time; + struct timer_list delay_timer; }; /* selected querier */ @@ -1159,7 +1159,7 @@ __br_multicast_querier_exists(struct net_bridge_mcast *brmctx, own_querier_enabled = false; } - return time_is_before_jiffies(querier->delay_time) && + return !timer_pending(&querier->delay_timer) && (own_querier_enabled || timer_pending(&querier->timer)); } -- GitLab From 1a89e24f8bfd3e3562d69709c9d9cd185ded869b Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 29 Jan 2024 21:10:59 +0200 Subject: [PATCH 0371/1405] devlink: Fix referring to hw_addr attribute during state validation When port function state change is requested, and when the driver does not support it, it refers to the hw address attribute instead of state attribute. Seems like a copy paste error. Fix it by referring to the port function state attribute. Fixes: c0bea69d1ca7 ("devlink: Validate port function request") Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240129191059.129030-1-parav@nvidia.com Signed-off-by: Jakub Kicinski --- net/devlink/port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/devlink/port.c b/net/devlink/port.c index 62e54e152ecf1..78592912f657c 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -674,7 +674,7 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port, return -EOPNOTSUPP; } if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) { - NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR], + NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FN_ATTR_STATE], "Function does not support state setting"); return -EOPNOTSUPP; } -- GitLab From f884a9f9e59206a2d41f265e7e403f080d10b493 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 10 Jan 2024 17:48:44 +0100 Subject: [PATCH 0372/1405] btrfs: send: return EOPNOTSUPP on unknown flags When some ioctl flags are checked we return EOPNOTSUPP, like for BTRFS_SCRUB_SUPPORTED_FLAGS, BTRFS_SUBVOL_CREATE_ARGS_MASK or fallocate modes. The EINVAL is supposed to be for a supported but invalid values or combination of options. Fix that when checking send flags so it's consistent with the rest. CC: stable@vger.kernel.org # 4.14+ Link: https://lore.kernel.org/linux-btrfs/CAL3q7H5rryOLzp3EKq8RTbjMHMHeaJubfpsVLF6H4qJnKCUR1w@mail.gmail.com/ Reviewed-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/send.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 2d7519a6ce72d..7902298c1f25b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -8111,7 +8111,7 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) } if (arg->flags & ~BTRFS_SEND_FLAG_MASK) { - ret = -EINVAL; + ret = -EOPNOTSUPP; goto out; } -- GitLab From 0c309d66dacddf8ce939b891d9ead4a8e21ad6f0 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Wed, 10 Jan 2024 17:51:26 -0800 Subject: [PATCH 0373/1405] btrfs: forbid creating subvol qgroups Creating a qgroup 0/subvolid leads to various races and it isn't helpful, because you can't specify a subvol id when creating a subvol, so you can't be sure it will be the right one. Any requirements on the automatic subvol can be gratified by using a higher level qgroup and the inheritance parameters of subvol creation. Fixes: cecbb533b5fc ("btrfs: record simple quota deltas in delayed refs") CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5d42319b43f2d..fb2323b323bf9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3806,6 +3806,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) goto out; } + if (sa->create && is_fstree(sa->qgroupid)) { + ret = -EINVAL; + goto out; + } + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); -- GitLab From a8df35619948bd8363d330c20a90c9a7fbff28c0 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Wed, 10 Jan 2024 17:30:00 -0800 Subject: [PATCH 0374/1405] btrfs: forbid deleting live subvol qgroup If a subvolume still exists, forbid deleting its qgroup 0/subvolid. This behavior generally leads to incorrect behavior in squotas and doesn't have a legitimate purpose. Fixes: cecbb533b5fc ("btrfs: record simple quota deltas in delayed refs") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 63b426cc77989..5470e1cdf10c5 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1736,6 +1736,15 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) return ret; } +static bool qgroup_has_usage(struct btrfs_qgroup *qgroup) +{ + return (qgroup->rfer > 0 || qgroup->rfer_cmpr > 0 || + qgroup->excl > 0 || qgroup->excl_cmpr > 0 || + qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] > 0 || + qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] > 0 || + qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > 0); +} + int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) { struct btrfs_fs_info *fs_info = trans->fs_info; @@ -1755,6 +1764,11 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) goto out; } + if (is_fstree(qgroupid) && qgroup_has_usage(qgroup)) { + ret = -EBUSY; + goto out; + } + /* Check if there are no children of this qgroup */ if (!list_empty(&qgroup->members)) { ret = -EBUSY; -- GitLab From e03ee2fe873eb68c1f9ba5112fee70303ebf9dfb Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sat, 20 Jan 2024 19:41:28 +1030 Subject: [PATCH 0375/1405] btrfs: do not ASSERT() if the newly created subvolume already got read [BUG] There is a syzbot crash, triggered by the ASSERT() during subvolume creation: assertion failed: !anon_dev, in fs/btrfs/disk-io.c:1319 ------------[ cut here ]------------ kernel BUG at fs/btrfs/disk-io.c:1319! invalid opcode: 0000 [#1] PREEMPT SMP KASAN RIP: 0010:btrfs_get_root_ref.part.0+0x9aa/0xa60 btrfs_get_new_fs_root+0xd3/0xf0 create_subvol+0xd02/0x1650 btrfs_mksubvol+0xe95/0x12b0 __btrfs_ioctl_snap_create+0x2f9/0x4f0 btrfs_ioctl_snap_create+0x16b/0x200 btrfs_ioctl+0x35f0/0x5cf0 __x64_sys_ioctl+0x19d/0x210 do_syscall_64+0x3f/0xe0 entry_SYSCALL_64_after_hwframe+0x63/0x6b ---[ end trace 0000000000000000 ]--- [CAUSE] During create_subvol(), after inserting root item for the newly created subvolume, we would trigger btrfs_get_new_fs_root() to get the btrfs_root of that subvolume. The idea here is, we have preallocated an anonymous device number for the subvolume, thus we can assign it to the new subvolume. But there is really nothing preventing things like backref walk to read the new subvolume. If that happens before we call btrfs_get_new_fs_root(), the subvolume would be read out, with a new anonymous device number assigned already. In that case, we would trigger ASSERT(), as we really expect no one to read out that subvolume (which is not yet accessible from the fs). But things like backref walk is still possible to trigger the read on the subvolume. Thus our assumption on the ASSERT() is not correct in the first place. [FIX] Fix it by removing the ASSERT(), and just free the @anon_dev, reset it to 0, and continue. If the subvolume tree is read out by something else, it should have already get a new anon_dev assigned thus we only need to free the preallocated one. Reported-by: Chenyuan Yang Fixes: 2dfb1e43f57d ("btrfs: preallocate anon block device at first phase of snapshot creation") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c6907d533fe83..e71ef97d0a7ca 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1336,8 +1336,17 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, again: root = btrfs_lookup_fs_root(fs_info, objectid); if (root) { - /* Shouldn't get preallocated anon_dev for cached roots */ - ASSERT(!anon_dev); + /* + * Some other caller may have read out the newly inserted + * subvolume already (for things like backref walk etc). Not + * that common but still possible. In that case, we just need + * to free the anon_dev. + */ + if (unlikely(anon_dev)) { + free_anon_bdev(anon_dev); + anon_dev = 0; + } + if (check_ref && btrfs_root_refs(&root->root_item) == 0) { btrfs_put_root(root); return ERR_PTR(-ENOENT); -- GitLab From 4451e8e8415e4ef48cdc763d66855f8c25fda94c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 23 Jan 2024 12:08:18 -0600 Subject: [PATCH 0376/1405] pinctrl: amd: Add IRQF_ONESHOT to the interrupt request On some systems the interrupt is shared between GPIO controller and ACPI SCI. When the interrupt is shared with the ACPI SCI the flags need to be identical. This should fix the GPIO controller failing to work after commit 7a36b901a6eb ("ACPI: OSL: Use a threaded interrupt handler for SCI"). ``` [ 0.417335] genirq: Flags mismatch irq 9. 00000088 (pinctrl_amd) vs. 00002080 (acpi) [ 0.420073] amd_gpio: probe of AMDI0030:00 failed with error -16 ``` Cc: Rafael J. Wysocki Reported-by: Christian Heusel Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218407 Fixes: 7a36b901a6eb ("ACPI: OSL: Use a threaded interrupt handler for SCI") Link: https://lore.kernel.org/linux-acpi/CAJZ5v0iRqUXeuKmC_+dAJtDBLWQ3x15n4gRH48y7MEaLoXF+UA@mail.gmail.com/T/#mc5506014141b61e472b24e095889535a04458083 Signed-off-by: Mario Limonciello Acked-by: Rafael J. Wysocki Tested-by: Christian Heusel Link: https://lore.kernel.org/r/20240123180818.3994-1-mario.limonciello@amd.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 03ecb3d1aaf60..49f89b70dcecb 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -1159,7 +1159,7 @@ static int amd_gpio_probe(struct platform_device *pdev) } ret = devm_request_irq(&pdev->dev, gpio_dev->irq, amd_gpio_irq_handler, - IRQF_SHARED, KBUILD_MODNAME, gpio_dev); + IRQF_SHARED | IRQF_ONESHOT, KBUILD_MODNAME, gpio_dev); if (ret) goto out2; -- GitLab From 4d5e86a56615cc387d21c629f9af8fb0e958d350 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 28 Jan 2024 11:29:11 +0200 Subject: [PATCH 0377/1405] RDMA/mlx5: Fix fortify source warning while accessing Eth segment ------------[ cut here ]------------ memcpy: detected field-spanning write (size 56) of single field "eseg->inline_hdr.start" at /var/lib/dkms/mlnx-ofed-kernel/5.8/build/drivers/infiniband/hw/mlx5/wr.c:131 (size 2) WARNING: CPU: 0 PID: 293779 at /var/lib/dkms/mlnx-ofed-kernel/5.8/build/drivers/infiniband/hw/mlx5/wr.c:131 mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] Modules linked in: 8021q garp mrp stp llc rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) ib_uverbs(OE) ib_core(OE) mlx5_core(OE) pci_hyperv_intf mlxdevm(OE) mlx_compat(OE) tls mlxfw(OE) psample nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables libcrc32c nfnetlink mst_pciconf(OE) knem(OE) vfio_pci vfio_pci_core vfio_iommu_type1 vfio iommufd irqbypass cuse nfsv3 nfs fscache netfs xfrm_user xfrm_algo ipmi_devintf ipmi_msghandler binfmt_misc crct10dif_pclmul crc32_pclmul polyval_clmulni polyval_generic ghash_clmulni_intel sha512_ssse3 snd_pcsp aesni_intel crypto_simd cryptd snd_pcm snd_timer joydev snd soundcore input_leds serio_raw evbug nfsd auth_rpcgss nfs_acl lockd grace sch_fq_codel sunrpc drm efi_pstore ip_tables x_tables autofs4 psmouse virtio_net net_failover failover floppy [last unloaded: mlx_compat(OE)] CPU: 0 PID: 293779 Comm: ssh Tainted: G OE 6.2.0-32-generic #32~22.04.1-Ubuntu Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] Code: 0c 01 00 a8 01 75 25 48 8b 75 a0 b9 02 00 00 00 48 c7 c2 10 5b fd c0 48 c7 c7 80 5b fd c0 c6 05 57 0c 03 00 01 e8 95 4d 93 da <0f> 0b 44 8b 4d b0 4c 8b 45 c8 48 8b 4d c0 e9 49 fb ff ff 41 0f b7 RSP: 0018:ffffb5b48478b570 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffb5b48478b628 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffb5b48478b5e8 R13: ffff963a3c609b5e R14: ffff9639c3fbd800 R15: ffffb5b480475a80 FS: 00007fc03b444c80(0000) GS:ffff963a3dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000556f46bdf000 CR3: 0000000006ac6003 CR4: 00000000003706f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? show_regs+0x72/0x90 ? mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] ? __warn+0x8d/0x160 ? mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] ? report_bug+0x1bb/0x1d0 ? handle_bug+0x46/0x90 ? exc_invalid_op+0x19/0x80 ? asm_exc_invalid_op+0x1b/0x20 ? mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] mlx5_ib_post_send_nodrain+0xb/0x20 [mlx5_ib] ipoib_send+0x2ec/0x770 [ib_ipoib] ipoib_start_xmit+0x5a0/0x770 [ib_ipoib] dev_hard_start_xmit+0x8e/0x1e0 ? validate_xmit_skb_list+0x4d/0x80 sch_direct_xmit+0x116/0x3a0 __dev_xmit_skb+0x1fd/0x580 __dev_queue_xmit+0x284/0x6b0 ? _raw_spin_unlock_irq+0xe/0x50 ? __flush_work.isra.0+0x20d/0x370 ? push_pseudo_header+0x17/0x40 [ib_ipoib] neigh_connected_output+0xcd/0x110 ip_finish_output2+0x179/0x480 ? __smp_call_single_queue+0x61/0xa0 __ip_finish_output+0xc3/0x190 ip_finish_output+0x2e/0xf0 ip_output+0x78/0x110 ? __pfx_ip_finish_output+0x10/0x10 ip_local_out+0x64/0x70 __ip_queue_xmit+0x18a/0x460 ip_queue_xmit+0x15/0x30 __tcp_transmit_skb+0x914/0x9c0 tcp_write_xmit+0x334/0x8d0 tcp_push_one+0x3c/0x60 tcp_sendmsg_locked+0x2e1/0xac0 tcp_sendmsg+0x2d/0x50 inet_sendmsg+0x43/0x90 sock_sendmsg+0x68/0x80 sock_write_iter+0x93/0x100 vfs_write+0x326/0x3c0 ksys_write+0xbd/0xf0 ? do_syscall_64+0x69/0x90 __x64_sys_write+0x19/0x30 do_syscall_64+0x59/0x90 ? do_user_addr_fault+0x1d0/0x640 ? exit_to_user_mode_prepare+0x3b/0xd0 ? irqentry_exit_to_user_mode+0x9/0x20 ? irqentry_exit+0x43/0x50 ? exc_page_fault+0x92/0x1b0 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7fc03ad14a37 Code: 10 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 RSP: 002b:00007ffdf8697fe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000008024 RCX: 00007fc03ad14a37 RDX: 0000000000008024 RSI: 0000556f46bd8270 RDI: 0000000000000003 RBP: 0000556f46bb1800 R08: 0000000000007fe3 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000002 R13: 0000556f46bc66b0 R14: 000000000000000a R15: 0000556f46bb2f50 ---[ end trace 0000000000000000 ]--- Link: https://lore.kernel.org/r/8228ad34bd1a25047586270f7b1fb4ddcd046282.1706433934.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/wr.c | 2 +- include/linux/mlx5/qp.h | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index df1d1b0a3ef72..9947feb7fb8a0 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -78,7 +78,7 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp, */ copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start, left); - memcpy(eseg->inline_hdr.start, pdata, copysz); + memcpy(eseg->inline_hdr.data, pdata, copysz); stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) - sizeof(eseg->inline_hdr.start) + copysz, 16); *size += stride / 16; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index bd53cf4be7bdc..f0e55bf3ec8b5 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -269,7 +269,10 @@ struct mlx5_wqe_eth_seg { union { struct { __be16 sz; - u8 start[2]; + union { + u8 start[2]; + DECLARE_FLEX_ARRAY(u8, data); + }; } inline_hdr; struct { __be16 type; -- GitLab From 43fdbd140238d44e7e847232719fef7d20f9d326 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 28 Jan 2024 11:29:12 +0200 Subject: [PATCH 0378/1405] IB/mlx5: Don't expose debugfs entries for RRoCE general parameters if not supported debugfs entries for RRoCE general CC parameters must be exposed only when they are supported, otherwise when accessing them there may be a syndrome error in kernel log, for example: $ cat /sys/kernel/debug/mlx5/0000:08:00.1/cc_params/rtt_resp_dscp cat: '/sys/kernel/debug/mlx5/0000:08:00.1/cc_params/rtt_resp_dscp': Invalid argument $ dmesg mlx5_core 0000:08:00.1: mlx5_cmd_out_err:805:(pid 1253): QUERY_CONG_PARAMS(0x824) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x325a82), err(-22) Fixes: 66fb1d5df6ac ("IB/mlx5: Extend debug control for CC parameters") Reviewed-by: Edward Srouji Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/e7ade70bad52b7468bdb1de4d41d5fad70c8b71c.1706433934.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/cong.c | 6 ++++++ include/linux/mlx5/mlx5_ifc.h | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c index f87531318feb8..a78a067e3ce7f 100644 --- a/drivers/infiniband/hw/mlx5/cong.c +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -458,6 +458,12 @@ void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num) dbg_cc_params->root = debugfs_create_dir("cc_params", mlx5_debugfs_get_dev_root(mdev)); for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) { + if ((i == MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID || + i == MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP)) + if (!MLX5_CAP_GEN(mdev, roce) || + !MLX5_CAP_ROCE(mdev, roce_cc_general)) + continue; + dbg_cc_params->params[i].offset = i; dbg_cc_params->params[i].dev = dev; dbg_cc_params->params[i].port_num = port_num; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index bf5320b28b8bf..2c10350bd4223 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1103,7 +1103,7 @@ struct mlx5_ifc_roce_cap_bits { u8 sw_r_roce_src_udp_port[0x1]; u8 fl_rc_qp_when_roce_disabled[0x1]; u8 fl_rc_qp_when_roce_enabled[0x1]; - u8 reserved_at_7[0x1]; + u8 roce_cc_general[0x1]; u8 qp_ooo_transmit_default[0x1]; u8 reserved_at_9[0x15]; u8 qp_ts_format[0x2]; -- GitLab From be551ee1574280ef8afbf7c271212ac3e38933ef Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 28 Jan 2024 11:29:13 +0200 Subject: [PATCH 0379/1405] RDMA/mlx5: Relax DEVX access upon modify commands Relax DEVX access upon modify commands to be UVERBS_ACCESS_READ. The kernel doesn't need to protect what firmware protects, or what causes no damage to anyone but the user. As firmware needs to protect itself from parallel access to the same object, don't block parallel modify/query commands on the same object in the kernel side. This change will allow user space application to run parallel updates to different entries in the same bulk object. Tested-by: Tamar Mashiah Signed-off-by: Yishai Hadas Reviewed-by: Michael Guralnik Link: https://lore.kernel.org/r/7407d5ed35dc427c1097699e12b49c01e1073406.1706433934.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/devx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 869369cb5b5fa..253fea374a72d 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2949,7 +2949,7 @@ DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_MODIFY, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, UVERBS_IDR_ANY_OBJECT, - UVERBS_ACCESS_WRITE, + UVERBS_ACCESS_READ, UA_MANDATORY), UVERBS_ATTR_PTR_IN( MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, -- GitLab From b4a1f4eaf1d798066affc6ad040f76eb1a16e1c9 Mon Sep 17 00:00:00 2001 From: Puliang Lu Date: Wed, 31 Jan 2024 17:12:24 +0800 Subject: [PATCH 0380/1405] USB: serial: option: add Fibocom FM101-GL variant Update the USB serial option driver support for the Fibocom FM101-GL LTE modules as there are actually several different variants. - VID:PID 2cb7:01a3, FM101-GL are laptop M.2 cards (with MBIM interfaces for /Linux/Chrome OS) 0x01a3:mbim,gnss Here are the outputs of usb-devices: T: Bus=04 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 3 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=2cb7 ProdID=01a3 Rev=05.04 S: Manufacturer=Fibocom Wireless Inc. S: Product=Fibocom FM101-GL Module S: SerialNumber=5ccd5cd4 C: #Ifs= 3 Cfg#= 1 Atr=a0 MxPwr=896mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=40 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms Signed-off-by: Puliang Lu Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 72390dbf07692..2ae124c49d448 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2269,6 +2269,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) }, /* Fibocom FM160 (MBIM mode) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a3, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff), /* Fibocom FM101-GL (laptop MBIM) */ .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ -- GitLab From 7cdd2108903a4e369eb37579830afc12a6877ec2 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 24 Jan 2024 12:26:57 +0100 Subject: [PATCH 0381/1405] HID: bpf: remove double fdget() When the kfunc hid_bpf_attach_prog() is called, we called twice fdget(): one for fetching the type of the bpf program, and one for actually attaching the program to the device. The problem is that between those two calls, we have no guarantees that the prog_fd is still the same file descriptor for the given program. Solve this by calling bpf_prog_get() earlier, and use this to fetch the program type. Reported-by: Dan Carpenter Link: https://lore.kernel.org/bpf/CAO-hwJJ8vh8JD3-P43L-_CLNmPx0hWj44aom0O838vfP4=_1CA@mail.gmail.com/T/#t Cc: Fixes: f5c27da4e3c8 ("HID: initial BPF implementation") Link: https://lore.kernel.org/r/20240124-b4-hid-bpf-fixes-v2-1-052520b1e5e6@kernel.org Signed-off-by: Benjamin Tissoires --- drivers/hid/bpf/hid_bpf_dispatch.c | 66 +++++++++++++++++++---------- drivers/hid/bpf/hid_bpf_dispatch.h | 4 +- drivers/hid/bpf/hid_bpf_jmp_table.c | 20 ++------- 3 files changed, 49 insertions(+), 41 deletions(-) diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index d9ef45fcaeab1..5111d1fef0d34 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -241,6 +241,39 @@ int hid_bpf_reconnect(struct hid_device *hdev) return 0; } +static int do_hid_bpf_attach_prog(struct hid_device *hdev, int prog_fd, struct bpf_prog *prog, + __u32 flags) +{ + int fd, err, prog_type; + + prog_type = hid_bpf_get_prog_attach_type(prog); + if (prog_type < 0) + return prog_type; + + if (prog_type >= HID_BPF_PROG_TYPE_MAX) + return -EINVAL; + + if (prog_type == HID_BPF_PROG_TYPE_DEVICE_EVENT) { + err = hid_bpf_allocate_event_data(hdev); + if (err) + return err; + } + + fd = __hid_bpf_attach_prog(hdev, prog_type, prog_fd, prog, flags); + if (fd < 0) + return fd; + + if (prog_type == HID_BPF_PROG_TYPE_RDESC_FIXUP) { + err = hid_bpf_reconnect(hdev); + if (err) { + close_fd(fd); + return err; + } + } + + return fd; +} + /** * hid_bpf_attach_prog - Attach the given @prog_fd to the given HID device * @@ -257,18 +290,13 @@ noinline int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) { struct hid_device *hdev; + struct bpf_prog *prog; struct device *dev; - int fd, err, prog_type = hid_bpf_get_prog_attach_type(prog_fd); + int fd; if (!hid_bpf_ops) return -EINVAL; - if (prog_type < 0) - return prog_type; - - if (prog_type >= HID_BPF_PROG_TYPE_MAX) - return -EINVAL; - if ((flags & ~HID_BPF_FLAG_MASK)) return -EINVAL; @@ -278,23 +306,17 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) hdev = to_hid_device(dev); - if (prog_type == HID_BPF_PROG_TYPE_DEVICE_EVENT) { - err = hid_bpf_allocate_event_data(hdev); - if (err) - return err; - } + /* + * take a ref on the prog itself, it will be released + * on errors or when it'll be detached + */ + prog = bpf_prog_get(prog_fd); + if (IS_ERR(prog)) + return PTR_ERR(prog); - fd = __hid_bpf_attach_prog(hdev, prog_type, prog_fd, flags); + fd = do_hid_bpf_attach_prog(hdev, prog_fd, prog, flags); if (fd < 0) - return fd; - - if (prog_type == HID_BPF_PROG_TYPE_RDESC_FIXUP) { - err = hid_bpf_reconnect(hdev); - if (err) { - close_fd(fd); - return err; - } - } + bpf_prog_put(prog); return fd; } diff --git a/drivers/hid/bpf/hid_bpf_dispatch.h b/drivers/hid/bpf/hid_bpf_dispatch.h index 63dfc8605cd21..fbe0639d09f26 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.h +++ b/drivers/hid/bpf/hid_bpf_dispatch.h @@ -12,9 +12,9 @@ struct hid_bpf_ctx_kern { int hid_bpf_preload_skel(void); void hid_bpf_free_links_and_skel(void); -int hid_bpf_get_prog_attach_type(int prog_fd); +int hid_bpf_get_prog_attach_type(struct bpf_prog *prog); int __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type, int prog_fd, - __u32 flags); + struct bpf_prog *prog, __u32 flags); void __hid_bpf_destroy_device(struct hid_device *hdev); int hid_bpf_prog_run(struct hid_device *hdev, enum hid_bpf_prog_type type, struct hid_bpf_ctx_kern *ctx_kern); diff --git a/drivers/hid/bpf/hid_bpf_jmp_table.c b/drivers/hid/bpf/hid_bpf_jmp_table.c index eca34b7372f95..12f7cebddd730 100644 --- a/drivers/hid/bpf/hid_bpf_jmp_table.c +++ b/drivers/hid/bpf/hid_bpf_jmp_table.c @@ -333,15 +333,10 @@ static int hid_bpf_insert_prog(int prog_fd, struct bpf_prog *prog) return err; } -int hid_bpf_get_prog_attach_type(int prog_fd) +int hid_bpf_get_prog_attach_type(struct bpf_prog *prog) { - struct bpf_prog *prog = NULL; - int i; int prog_type = HID_BPF_PROG_TYPE_UNDEF; - - prog = bpf_prog_get(prog_fd); - if (IS_ERR(prog)) - return PTR_ERR(prog); + int i; for (i = 0; i < HID_BPF_PROG_TYPE_MAX; i++) { if (hid_bpf_btf_ids[i] == prog->aux->attach_btf_id) { @@ -350,8 +345,6 @@ int hid_bpf_get_prog_attach_type(int prog_fd) } } - bpf_prog_put(prog); - return prog_type; } @@ -388,19 +381,13 @@ static const struct bpf_link_ops hid_bpf_link_lops = { /* called from syscall */ noinline int __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type, - int prog_fd, __u32 flags) + int prog_fd, struct bpf_prog *prog, __u32 flags) { struct bpf_link_primer link_primer; struct hid_bpf_link *link; - struct bpf_prog *prog = NULL; struct hid_bpf_prog_entry *prog_entry; int cnt, err = -EINVAL, prog_table_idx = -1; - /* take a ref on the prog itself */ - prog = bpf_prog_get(prog_fd); - if (IS_ERR(prog)) - return PTR_ERR(prog); - mutex_lock(&hid_bpf_attach_lock); link = kzalloc(sizeof(*link), GFP_USER); @@ -467,7 +454,6 @@ __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type, err_unlock: mutex_unlock(&hid_bpf_attach_lock); - bpf_prog_put(prog); kfree(link); return err; -- GitLab From 89be8aa5b0ecb3b729c7bcff64bb2af7921fec63 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 24 Jan 2024 12:26:58 +0100 Subject: [PATCH 0382/1405] HID: bpf: actually free hdev memory after attaching a HID-BPF program Turns out that I got my reference counts wrong and each successful bus_find_device() actually calls get_device(), and we need to manually call put_device(). Ensure each bus_find_device() gets a matching put_device() when releasing the bpf programs and fix all the error paths. Cc: Fixes: f5c27da4e3c8 ("HID: initial BPF implementation") Link: https://lore.kernel.org/r/20240124-b4-hid-bpf-fixes-v2-2-052520b1e5e6@kernel.org Signed-off-by: Benjamin Tissoires --- drivers/hid/bpf/hid_bpf_dispatch.c | 29 +++++++++++++++++++++++------ drivers/hid/bpf/hid_bpf_jmp_table.c | 20 +++++++++++++++++--- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index 5111d1fef0d34..7903c8638e817 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -292,7 +292,7 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) struct hid_device *hdev; struct bpf_prog *prog; struct device *dev; - int fd; + int err, fd; if (!hid_bpf_ops) return -EINVAL; @@ -311,14 +311,24 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) * on errors or when it'll be detached */ prog = bpf_prog_get(prog_fd); - if (IS_ERR(prog)) - return PTR_ERR(prog); + if (IS_ERR(prog)) { + err = PTR_ERR(prog); + goto out_dev_put; + } fd = do_hid_bpf_attach_prog(hdev, prog_fd, prog, flags); - if (fd < 0) - bpf_prog_put(prog); + if (fd < 0) { + err = fd; + goto out_prog_put; + } return fd; + + out_prog_put: + bpf_prog_put(prog); + out_dev_put: + put_device(dev); + return err; } /** @@ -345,8 +355,10 @@ hid_bpf_allocate_context(unsigned int hid_id) hdev = to_hid_device(dev); ctx_kern = kzalloc(sizeof(*ctx_kern), GFP_KERNEL); - if (!ctx_kern) + if (!ctx_kern) { + put_device(dev); return NULL; + } ctx_kern->ctx.hid = hdev; @@ -363,10 +375,15 @@ noinline void hid_bpf_release_context(struct hid_bpf_ctx *ctx) { struct hid_bpf_ctx_kern *ctx_kern; + struct hid_device *hid; ctx_kern = container_of(ctx, struct hid_bpf_ctx_kern, ctx); + hid = (struct hid_device *)ctx_kern->ctx.hid; /* ignore const */ kfree(ctx_kern); + + /* get_device() is called by bus_find_device() */ + put_device(&hid->dev); } /** diff --git a/drivers/hid/bpf/hid_bpf_jmp_table.c b/drivers/hid/bpf/hid_bpf_jmp_table.c index 12f7cebddd730..aa8e1c79cdf55 100644 --- a/drivers/hid/bpf/hid_bpf_jmp_table.c +++ b/drivers/hid/bpf/hid_bpf_jmp_table.c @@ -196,6 +196,7 @@ static void __hid_bpf_do_release_prog(int map_fd, unsigned int idx) static void hid_bpf_release_progs(struct work_struct *work) { int i, j, n, map_fd = -1; + bool hdev_destroyed; if (!jmp_table.map) return; @@ -220,6 +221,12 @@ static void hid_bpf_release_progs(struct work_struct *work) if (entry->hdev) { hdev = entry->hdev; type = entry->type; + /* + * hdev is still valid, even if we are called after hid_destroy_device(): + * when hid_bpf_attach() gets called, it takes a ref on the dev through + * bus_find_device() + */ + hdev_destroyed = hdev->bpf.destroyed; hid_bpf_populate_hdev(hdev, type); @@ -232,12 +239,19 @@ static void hid_bpf_release_progs(struct work_struct *work) if (test_bit(next->idx, jmp_table.enabled)) continue; - if (next->hdev == hdev && next->type == type) + if (next->hdev == hdev && next->type == type) { + /* + * clear the hdev reference and decrement the device ref + * that was taken during bus_find_device() while calling + * hid_bpf_attach() + */ next->hdev = NULL; + put_device(&hdev->dev); + } } - /* if type was rdesc fixup, reconnect device */ - if (type == HID_BPF_PROG_TYPE_RDESC_FIXUP) + /* if type was rdesc fixup and the device is not gone, reconnect device */ + if (type == HID_BPF_PROG_TYPE_RDESC_FIXUP && !hdev_destroyed) hid_bpf_reconnect(hdev); } } -- GitLab From 764ad6b02777d77dca3659ca490f0898aa593670 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 24 Jan 2024 12:26:59 +0100 Subject: [PATCH 0383/1405] HID: bpf: use __bpf_kfunc instead of noinline Follow the docs at Documentation/bpf/kfuncs.rst: - declare the function with `__bpf_kfunc` - disables missing prototype warnings, which allows to remove them from include/linux/hid-bpf.h Removing the prototypes is not an issue because we currently have to redeclare them when writing the BPF program. They will eventually be generated by bpftool directly AFAIU. Link: https://lore.kernel.org/r/20240124-b4-hid-bpf-fixes-v2-3-052520b1e5e6@kernel.org Signed-off-by: Benjamin Tissoires --- drivers/hid/bpf/hid_bpf_dispatch.c | 18 +++++++++++++----- include/linux/hid_bpf.h | 11 ----------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index 7903c8638e817..470ae2c29c94f 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -143,6 +143,9 @@ u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *s } EXPORT_SYMBOL_GPL(call_hid_bpf_rdesc_fixup); +/* Disables missing prototype warnings */ +__bpf_kfunc_start_defs(); + /** * hid_bpf_get_data - Get the kernel memory pointer associated with the context @ctx * @@ -152,7 +155,7 @@ EXPORT_SYMBOL_GPL(call_hid_bpf_rdesc_fixup); * * @returns %NULL on error, an %__u8 memory pointer on success */ -noinline __u8 * +__bpf_kfunc __u8 * hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t rdwr_buf_size) { struct hid_bpf_ctx_kern *ctx_kern; @@ -167,6 +170,7 @@ hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t rdwr return ctx_kern->data + offset; } +__bpf_kfunc_end_defs(); /* * The following set contains all functions we agree BPF programs @@ -274,6 +278,9 @@ static int do_hid_bpf_attach_prog(struct hid_device *hdev, int prog_fd, struct b return fd; } +/* Disables missing prototype warnings */ +__bpf_kfunc_start_defs(); + /** * hid_bpf_attach_prog - Attach the given @prog_fd to the given HID device * @@ -286,7 +293,7 @@ static int do_hid_bpf_attach_prog(struct hid_device *hdev, int prog_fd, struct b * is pinned to the BPF file system). */ /* called from syscall */ -noinline int +__bpf_kfunc int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) { struct hid_device *hdev; @@ -338,7 +345,7 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) * * @returns A pointer to &struct hid_bpf_ctx on success, %NULL on error. */ -noinline struct hid_bpf_ctx * +__bpf_kfunc struct hid_bpf_ctx * hid_bpf_allocate_context(unsigned int hid_id) { struct hid_device *hdev; @@ -371,7 +378,7 @@ hid_bpf_allocate_context(unsigned int hid_id) * @ctx: the HID-BPF context to release * */ -noinline void +__bpf_kfunc void hid_bpf_release_context(struct hid_bpf_ctx *ctx) { struct hid_bpf_ctx_kern *ctx_kern; @@ -397,7 +404,7 @@ hid_bpf_release_context(struct hid_bpf_ctx *ctx) * * @returns %0 on success, a negative error code otherwise. */ -noinline int +__bpf_kfunc int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, enum hid_report_type rtype, enum hid_class_request reqtype) { @@ -465,6 +472,7 @@ hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, kfree(dma_data); return ret; } +__bpf_kfunc_end_defs(); /* our HID-BPF entrypoints */ BTF_SET8_START(hid_bpf_fmodret_ids) diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 840cd254172d0..7118ac28d4687 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -77,17 +77,6 @@ enum hid_bpf_attach_flags { int hid_bpf_device_event(struct hid_bpf_ctx *ctx); int hid_bpf_rdesc_fixup(struct hid_bpf_ctx *ctx); -/* Following functions are kfunc that we export to BPF programs */ -/* available everywhere in HID-BPF */ -__u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t __sz); - -/* only available in syscall */ -int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags); -int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, - enum hid_report_type rtype, enum hid_class_request reqtype); -struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id); -void hid_bpf_release_context(struct hid_bpf_ctx *ctx); - /* * Below is HID internal */ -- GitLab From 51af8f255bdaca6d501afc0d085b808f67b44d91 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Tue, 30 Jan 2024 15:21:51 +0200 Subject: [PATCH 0384/1405] ahci: Extend ASM1061 43-bit DMA address quirk to other ASM106x parts ASMedia have confirmed that all ASM106x parts currently listed in ahci_pci_tbl[] suffer from the 43-bit DMA address limitation that we ran into on the ASM1061, and therefore, we need to apply the quirk added by commit 20730e9b2778 ("ahci: add 43-bit DMA address quirk for ASMedia ASM1061 controllers") to the other supported ASM106x parts as well. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/linux-ide/ZbopwKZJAKQRA4Xv@x1-carbon/ Signed-off-by: Lennert Buytenhek [cassel: add link to ASMedia confirmation email] Signed-off-by: Niklas Cassel --- drivers/ata/ahci.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index d2460fa985b7e..da2e74fce2d99 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -606,13 +606,13 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(PROMISE, 0x3781), board_ahci }, /* FastTrak TX8660 ahci-mode */ /* ASMedia */ - { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci }, /* ASM1060 */ - { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci }, /* ASM1060 */ + { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci_43bit_dma }, /* ASM1060 */ + { PCI_VDEVICE(ASMEDIA, 0x0602), board_ahci_43bit_dma }, /* ASM1060 */ { PCI_VDEVICE(ASMEDIA, 0x0611), board_ahci_43bit_dma }, /* ASM1061 */ { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci_43bit_dma }, /* ASM1061/1062 */ - { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci }, /* ASM1061R */ - { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci }, /* ASM1062R */ - { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci }, /* ASM1062+JMB575 */ + { PCI_VDEVICE(ASMEDIA, 0x0621), board_ahci_43bit_dma }, /* ASM1061R */ + { PCI_VDEVICE(ASMEDIA, 0x0622), board_ahci_43bit_dma }, /* ASM1062R */ + { PCI_VDEVICE(ASMEDIA, 0x0624), board_ahci_43bit_dma }, /* ASM1062+JMB575 */ { PCI_VDEVICE(ASMEDIA, 0x1062), board_ahci }, /* ASM1062A */ { PCI_VDEVICE(ASMEDIA, 0x1064), board_ahci }, /* ASM1064 */ { PCI_VDEVICE(ASMEDIA, 0x1164), board_ahci }, /* ASM1164 */ -- GitLab From 913b9d443a0180cf0de3548f1ab3149378998486 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 31 Jan 2024 13:37:25 +0100 Subject: [PATCH 0385/1405] parisc: BTLB: Fix crash when setting up BTLB at CPU bringup When using hotplug and bringing up a 32-bit CPU, ask the firmware about the BTLB information to set up the static (block) TLB entries. For that write access to the static btlb_info struct is needed, but since it is marked __ro_after_init the kernel segfaults with missing write permissions. Fix the crash by dropping the __ro_after_init annotation. Fixes: e5ef93d02d6c ("parisc: BTLB: Initialize BTLB tables at CPU startup") Signed-off-by: Helge Deller Cc: # v6.6+ --- arch/parisc/kernel/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 5552602fcaef1..422f3e1e6d9ca 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -58,7 +58,7 @@ int pa_serialize_tlb_flushes __ro_after_init; struct pdc_cache_info cache_info __ro_after_init; #ifndef CONFIG_PA20 -struct pdc_btlb_info btlb_info __ro_after_init; +struct pdc_btlb_info btlb_info; #endif DEFINE_STATIC_KEY_TRUE(parisc_has_cache); -- GitLab From 2468e8922d2f6da81a6192b73023eff67e3fefdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Relvas?= Date: Wed, 31 Jan 2024 11:34:09 +0000 Subject: [PATCH 0386/1405] ALSA: hda/realtek: Apply headset jack quirk for non-bass alc287 thinkpads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There currently exists two thinkpad headset jack fixups: ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK ALC285_FIXUP_THINKPAD_HEADSET_JACK The latter is applied to alc285 and alc287 thinkpads which contain bass speakers. However, the former was only being applied to alc285 thinkpads, leaving non-bass alc287 thinkpads with no headset button controls. This patch fixes that by adding ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK to the alc287 chains, allowing the detection of headset buttons. Signed-off-by: José Relvas Cc: Link: https://lore.kernel.org/r/20240131113407.34698-3-josemonsantorelvas@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bfa244028b994..f98520c0bed9f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9579,7 +9579,7 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = cs35l41_fixup_i2c_two, .chained = true, - .chain_id = ALC269_FIXUP_THINKPAD_ACPI, + .chain_id = ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, }, [ALC287_FIXUP_TAS2781_I2C] = { .type = HDA_FIXUP_FUNC, @@ -9606,6 +9606,8 @@ static const struct hda_fixup alc269_fixups[] = { [ALC287_FIXUP_THINKPAD_I2S_SPK] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_bind_dacs, + .chained = true, + .chain_id = ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, }, [ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD] = { .type = HDA_FIXUP_FUNC, -- GitLab From 2d3b3ab8d0d54e6c20260c8c1a73901a7a58a9cd Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Fri, 19 Jan 2024 17:06:21 +0530 Subject: [PATCH 0387/1405] dt-bindings: xilinx: replace Piyush Mehta maintainership As Piyush is leaving AMD, he handed over ahci-ceva, ZynqMP Mode Pin GPIO controller, Zynq UltraScale+ MPSoC and Versal reset, Xilinx SuperSpeed DWC3 USB SoC controller, Microchip USB5744 4-port Hub Controller and Xilinx udc controller maintainership duties to Mubin and Radhey. Signed-off-by: Radhey Shyam Pandey Acked-by: Mubin Sayyed Acked-by: Michal Simek Acked-by: Krzysztof Kozlowski Acked-by: Piyush Mehta Acked-by: Bartosz Golaszewski Acked-by: Niklas Cassel Link: https://lore.kernel.org/r/1705664181-722937-1-git-send-email-radhey.shyam.pandey@amd.com Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml | 3 ++- .../devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml | 3 ++- Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml | 3 ++- Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml | 3 ++- Documentation/devicetree/bindings/usb/microchip,usb5744.yaml | 3 ++- Documentation/devicetree/bindings/usb/xlnx,usb2.yaml | 3 ++- 6 files changed, 12 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml index b29ce598f9aae..9952e0ef77674 100644 --- a/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml +++ b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml @@ -7,7 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Ceva AHCI SATA Controller maintainers: - - Piyush Mehta + - Mubin Sayyed + - Radhey Shyam Pandey description: | The Ceva SATA controller mostly conforms to the AHCI interface with some diff --git a/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml b/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml index 56143f1fe84ac..501771c672a62 100644 --- a/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml +++ b/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml @@ -12,7 +12,8 @@ description: PS_MODE). Every pin can be configured as input/output. maintainers: - - Piyush Mehta + - Mubin Sayyed + - Radhey Shyam Pandey properties: compatible: diff --git a/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml b/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml index 49db668014297..1f1b42dde94d5 100644 --- a/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml +++ b/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml @@ -7,7 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Zynq UltraScale+ MPSoC and Versal reset maintainers: - - Piyush Mehta + - Mubin Sayyed + - Radhey Shyam Pandey description: | The Zynq UltraScale+ MPSoC and Versal has several different resets. diff --git a/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml b/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml index bb373eb025a5f..00f87a558c7dd 100644 --- a/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml +++ b/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml @@ -7,7 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Xilinx SuperSpeed DWC3 USB SoC controller maintainers: - - Piyush Mehta + - Mubin Sayyed + - Radhey Shyam Pandey properties: compatible: diff --git a/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml b/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml index ff3a1707ef570..dfaf3ddde1051 100644 --- a/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml +++ b/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml @@ -16,8 +16,9 @@ description: USB 2.0 traffic. maintainers: - - Piyush Mehta - Michal Simek + - Mubin Sayyed + - Radhey Shyam Pandey properties: compatible: diff --git a/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml b/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml index 868dffe314bcb..a7f75fe366652 100644 --- a/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml +++ b/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml @@ -7,7 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Xilinx udc controller maintainers: - - Piyush Mehta + - Mubin Sayyed + - Radhey Shyam Pandey properties: compatible: -- GitLab From 4c654ec0ba0f9a04905ebad538689524a8779154 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 22 Jan 2024 14:49:58 -0600 Subject: [PATCH 0388/1405] dt-bindings: display: nxp,tda998x: Fix 'audio-ports' constraints The constraints for 'audio-ports' don't match the description. There can be 1 or 2 DAI entries and each entry is exactly 2 values. Also, the values' sizes are 32-bits, not 8-bits. Move the size constraints to the outer dimension (number of DAIs) and add constraints on inner array values. Link: https://lore.kernel.org/r/20240122204959.1665970-1-robh@kernel.org Signed-off-by: Rob Herring --- .../devicetree/bindings/display/bridge/nxp,tda998x.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/display/bridge/nxp,tda998x.yaml b/Documentation/devicetree/bindings/display/bridge/nxp,tda998x.yaml index 21d995f29a1e3..b8e9cf6ce4e61 100644 --- a/Documentation/devicetree/bindings/display/bridge/nxp,tda998x.yaml +++ b/Documentation/devicetree/bindings/display/bridge/nxp,tda998x.yaml @@ -29,19 +29,22 @@ properties: audio-ports: description: - Array of 8-bit values, 2 values per DAI (Documentation/sound/soc/dai.rst). + Array of 2 values per DAI (Documentation/sound/soc/dai.rst). The implementation allows one or two DAIs. If two DAIs are defined, they must be of different type. $ref: /schemas/types.yaml#/definitions/uint32-matrix + minItems: 1 + maxItems: 2 items: - minItems: 1 items: - description: | The first value defines the DAI type: TDA998x_SPDIF or TDA998x_I2S (see include/dt-bindings/display/tda998x.h). + enum: [ 1, 2 ] - description: The second value defines the tda998x AP_ENA reg content when the DAI in question is used. + maximum: 0xff '#sound-dai-cells': enum: [ 0, 1 ] -- GitLab From adf0c363ad2c7ceccb2831a295cd8fc50378269e Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 24 Jan 2024 13:07:14 -0600 Subject: [PATCH 0389/1405] dt-bindings: tpm: Drop type from "resets" "resets" is a standard property which already has a type. Users only need to define how many clocks and what each clock is if more than 1 clock. Reviewed-by: Lukas Wunner Link: https://lore.kernel.org/r/20240124190714.1553772-1-robh@kernel.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/tpm/tpm-common.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/tpm/tpm-common.yaml b/Documentation/devicetree/bindings/tpm/tpm-common.yaml index 90390624a8be5..3c1241b2a43f9 100644 --- a/Documentation/devicetree/bindings/tpm/tpm-common.yaml +++ b/Documentation/devicetree/bindings/tpm/tpm-common.yaml @@ -42,7 +42,7 @@ properties: resets: description: Reset controller to reset the TPM - $ref: /schemas/types.yaml#/definitions/phandle + maxItems: 1 reset-gpios: description: Output GPIO pin to reset the TPM -- GitLab From 8f7e917907385e112a845d668ae2832f41e64bf5 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Tue, 23 Jan 2024 16:14:22 +0100 Subject: [PATCH 0390/1405] of: property: fix typo in io-channels The property is io-channels and not io-channel. This was effectively preventing the devlink creation. Fixes: 8e12257dead7 ("of: property: Add device link support for iommus, mboxes and io-channels") Cc: stable@vger.kernel.org Signed-off-by: Nuno Sa Reviewed-by: Saravana Kannan Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240123-iio-backend-v7-1-1bff236b8693@analog.com Signed-off-by: Rob Herring --- drivers/of/property.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/property.c b/drivers/of/property.c index 641a40cf5cf34..bbf0dee2fb9ca 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1244,7 +1244,7 @@ DEFINE_SIMPLE_PROP(clocks, "clocks", "#clock-cells") DEFINE_SIMPLE_PROP(interconnects, "interconnects", "#interconnect-cells") DEFINE_SIMPLE_PROP(iommus, "iommus", "#iommu-cells") DEFINE_SIMPLE_PROP(mboxes, "mboxes", "#mbox-cells") -DEFINE_SIMPLE_PROP(io_channels, "io-channel", "#io-channel-cells") +DEFINE_SIMPLE_PROP(io_channels, "io-channels", "#io-channel-cells") DEFINE_SIMPLE_PROP(interrupt_parent, "interrupt-parent", NULL) DEFINE_SIMPLE_PROP(dmas, "dmas", "#dma-cells") DEFINE_SIMPLE_PROP(power_domains, "power-domains", "#power-domain-cells") -- GitLab From 89876175c8c83c35cf0cc8e21b7460dfed7b118a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 20 Jan 2024 17:32:55 +0900 Subject: [PATCH 0391/1405] kbuild: fix W= flags in the help message W=c and W=e are supported. Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9869f57c3fb3e..02e3500c2c178 100644 --- a/Makefile +++ b/Makefile @@ -1662,7 +1662,7 @@ help: @echo ' (sparse by default)' @echo ' make C=2 [targets] Force check of all c source with $$CHECK' @echo ' make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections' - @echo ' make W=n [targets] Enable extra build checks, n=1,2,3 where' + @echo ' make W=n [targets] Enable extra build checks, n=1,2,3,c,e where' @echo ' 1: warnings which may be relevant and do not occur too often' @echo ' 2: warnings which occur quite often but may still be relevant' @echo ' 3: more obscure warnings, can most likely be ignored' -- GitLab From cda5f94e88b45c9209599bac15fc44add5a59f60 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 27 Jan 2024 22:28:11 +0900 Subject: [PATCH 0392/1405] modpost: avoid using the alias attribute Aiden Leong reported modpost fails to build on macOS since commit 16a473f60edc ("modpost: inform compilers that fatal() never returns"): scripts/mod/modpost.c:93:21: error: aliases are not supported on darwin Nathan's research indicates that Darwin seems to support weak aliases at least [1]. Although the situation might be improved in future Clang versions, we can achieve a similar outcome without relying on it. This commit makes fatal() a macro of error() + exit(1) in modpost.h, as compilers recognize that exit() never returns. [1]: https://github.com/llvm/llvm-project/issues/71001 Fixes: 16a473f60edc ("modpost: inform compilers that fatal() never returns") Reported-by: Aiden Leong Closes: https://lore.kernel.org/all/d9ac2960-6644-4a87-b5e4-4bfb6e0364a8@aibsd.com/ Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 12 +----------- scripts/mod/modpost.h | 6 +----- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index acf72112acd8b..267b9a0a3abcd 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -70,9 +70,7 @@ void modpost_log(enum loglevel loglevel, const char *fmt, ...) break; case LOG_ERROR: fprintf(stderr, "ERROR: "); - break; - case LOG_FATAL: - fprintf(stderr, "FATAL: "); + error_occurred = true; break; default: /* invalid loglevel, ignore */ break; @@ -83,16 +81,8 @@ void modpost_log(enum loglevel loglevel, const char *fmt, ...) va_start(arglist, fmt); vfprintf(stderr, fmt, arglist); va_end(arglist); - - if (loglevel == LOG_FATAL) - exit(1); - if (loglevel == LOG_ERROR) - error_occurred = true; } -void __attribute__((alias("modpost_log"))) -modpost_log_noret(enum loglevel loglevel, const char *fmt, ...); - static inline bool strends(const char *str, const char *postfix) { if (strlen(str) < strlen(postfix)) diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h index 835cababf1b09..ee43c79506368 100644 --- a/scripts/mod/modpost.h +++ b/scripts/mod/modpost.h @@ -194,15 +194,11 @@ void *sym_get_data(const struct elf_info *info, const Elf_Sym *sym); enum loglevel { LOG_WARN, LOG_ERROR, - LOG_FATAL }; void __attribute__((format(printf, 2, 3))) modpost_log(enum loglevel loglevel, const char *fmt, ...); -void __attribute__((format(printf, 2, 3), noreturn)) -modpost_log_noret(enum loglevel loglevel, const char *fmt, ...); - /* * warn - show the given message, then let modpost continue running, still * allowing modpost to exit successfully. This should be used when @@ -218,4 +214,4 @@ modpost_log_noret(enum loglevel loglevel, const char *fmt, ...); */ #define warn(fmt, args...) modpost_log(LOG_WARN, fmt, ##args) #define error(fmt, args...) modpost_log(LOG_ERROR, fmt, ##args) -#define fatal(fmt, args...) modpost_log_noret(LOG_FATAL, fmt, ##args) +#define fatal(fmt, args...) do { error(fmt, ##args); exit(1); } while (1) -- GitLab From 82175d1f9430d5a026e2231782d13da0bf57155c Mon Sep 17 00:00:00 2001 From: Dmitry Goncharov Date: Sun, 28 Jan 2024 10:10:39 -0500 Subject: [PATCH 0393/1405] kbuild: Replace tabs with spaces when followed by conditionals This is needed for the future (post make-4.4.1) versions of gnu make. Starting from https://git.savannah.gnu.org/cgit/make.git/commit/?id=07fcee35f058a876447c8a021f9eb1943f902534 gnu make won't allow conditionals to follow recipe prefix. For example there is a tab followed by ifeq on line 324 in the root Makefile. With the new make this conditional causes the following $ make cpu.o /home/dgoncharov/src/linux-kbuild/Makefile:2063: *** missing 'endif'. Stop. make: *** [Makefile:240: __sub-make] Error 2 This patch replaces tabs followed by conditionals with 8 spaces. See https://savannah.gnu.org/bugs/?64185 and https://savannah.gnu.org/bugs/?64259 for details. Signed-off-by: Dmitry Goncharov Reported-by: Martin Dorey Reviewed-by: Miguel Ojeda Signed-off-by: Masahiro Yamada --- Makefile | 12 ++++++------ arch/m68k/Makefile | 4 ++-- arch/parisc/Makefile | 4 ++-- arch/x86/Makefile | 10 +++++----- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index 02e3500c2c178..087c9c64f611a 100644 --- a/Makefile +++ b/Makefile @@ -294,15 +294,15 @@ may-sync-config := 1 single-build := ifneq ($(filter $(no-dot-config-targets), $(MAKECMDGOALS)),) - ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),) + ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),) need-config := - endif + endif endif ifneq ($(filter $(no-sync-config-targets), $(MAKECMDGOALS)),) - ifeq ($(filter-out $(no-sync-config-targets), $(MAKECMDGOALS)),) + ifeq ($(filter-out $(no-sync-config-targets), $(MAKECMDGOALS)),) may-sync-config := - endif + endif endif need-compiler := $(may-sync-config) @@ -323,9 +323,9 @@ endif # We cannot build single targets and the others at the same time ifneq ($(filter $(single-targets), $(MAKECMDGOALS)),) single-build := 1 - ifneq ($(filter-out $(single-targets), $(MAKECMDGOALS)),) + ifneq ($(filter-out $(single-targets), $(MAKECMDGOALS)),) mixed-build := 1 - endif + endif endif # For "make -j clean all", "make -j mrproper defconfig all", etc. diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile index 43e39040d3ac6..76ef1a67c3611 100644 --- a/arch/m68k/Makefile +++ b/arch/m68k/Makefile @@ -15,10 +15,10 @@ KBUILD_DEFCONFIG := multi_defconfig ifdef cross_compiling - ifeq ($(CROSS_COMPILE),) + ifeq ($(CROSS_COMPILE),) CROSS_COMPILE := $(call cc-cross-prefix, \ m68k-linux-gnu- m68k-linux- m68k-unknown-linux-gnu-) - endif + endif endif # diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 920db57b6b4cc..7486b3b305949 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -50,12 +50,12 @@ export CROSS32CC # Set default cross compiler for kernel build ifdef cross_compiling - ifeq ($(CROSS_COMPILE),) + ifeq ($(CROSS_COMPILE),) CC_SUFFIXES = linux linux-gnu unknown-linux-gnu suse-linux CROSS_COMPILE := $(call cc-cross-prefix, \ $(foreach a,$(CC_ARCHES), \ $(foreach s,$(CC_SUFFIXES),$(a)-$(s)-))) - endif + endif endif ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1a068de12a564..2264db14a25d3 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -112,13 +112,13 @@ ifeq ($(CONFIG_X86_32),y) # temporary until string.h is fixed KBUILD_CFLAGS += -ffreestanding - ifeq ($(CONFIG_STACKPROTECTOR),y) - ifeq ($(CONFIG_SMP),y) + ifeq ($(CONFIG_STACKPROTECTOR),y) + ifeq ($(CONFIG_SMP),y) KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard - else + else KBUILD_CFLAGS += -mstack-protector-guard=global - endif - endif + endif + endif else BITS := 64 UTS_MACHINE := x86_64 -- GitLab From 358de8b4f201bc05712484b15f0109b1ae3516a8 Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Mon, 29 Jan 2024 10:28:19 +0100 Subject: [PATCH 0394/1405] kbuild: rpm-pkg: simplify installkernel %post The new installkernel application that is now included in systemd-udev package allows installation although destination files are already present in the boot directory of the kernel package, but is failing with the implemented workaround for the old installkernel application from grubby package. For the new installkernel application, as Davide says: <> But we need to keep the old behavior as well, because the old installkernel application from grubby package, does not allow this simplification and we need to be backward compatible to avoid issues with the different packages. Mimic Fedora shipping process and store vmlinuz, config amd System.map in the module directory instead of the boot directory. In this way, we will avoid the commented problem for all the cases, because the new destination files are not going to exist in the boot directory of the kernel package. Replace installkernel tool with kernel-install tool, because the latter is more complete. Besides, after installkernel tool execution, check to complete if the correct package files vmlinuz, System.map and config files are present in /boot directory, and if necessary, copy manually for install operation. In this way, take into account if files were not previously copied from /usr/lib/kernel/install.d/* scripts and if the suitable files for the requested package are present (it could be others if the rpm files were replace with a new pacakge with the same release and a different build). Tested with Fedora 38, Fedora 39, RHEL 9, Oracle Linux 9.3, openSUSE Tumbleweed and openMandrive ROME, using dnf/zypper and rpm tools. cc: stable@vger.kernel.org Co-Developed-by: Davide Cavalca Signed-off-by: Jose Ignacio Tornos Martinez Signed-off-by: Masahiro Yamada --- scripts/package/kernel.spec | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec index 89298983a1694..f58726671fb37 100644 --- a/scripts/package/kernel.spec +++ b/scripts/package/kernel.spec @@ -55,12 +55,12 @@ patch -p1 < %{SOURCE2} %{make} %{makeflags} KERNELRELEASE=%{KERNELRELEASE} KBUILD_BUILD_VERSION=%{release} %install -mkdir -p %{buildroot}/boot -cp $(%{make} %{makeflags} -s image_name) %{buildroot}/boot/vmlinuz-%{KERNELRELEASE} +mkdir -p %{buildroot}/lib/modules/%{KERNELRELEASE} +cp $(%{make} %{makeflags} -s image_name) %{buildroot}/lib/modules/%{KERNELRELEASE}/vmlinuz %{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} modules_install %{make} %{makeflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install -cp System.map %{buildroot}/boot/System.map-%{KERNELRELEASE} -cp .config %{buildroot}/boot/config-%{KERNELRELEASE} +cp System.map %{buildroot}/lib/modules/%{KERNELRELEASE} +cp .config %{buildroot}/lib/modules/%{KERNELRELEASE}/config ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEASE}/build %if %{with_devel} %{make} %{makeflags} run-command KBUILD_RUN_COMMAND='${srctree}/scripts/package/install-extmod-build %{buildroot}/usr/src/kernels/%{KERNELRELEASE}' @@ -70,13 +70,14 @@ ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEA rm -rf %{buildroot} %post -if [ -x /sbin/installkernel -a -r /boot/vmlinuz-%{KERNELRELEASE} -a -r /boot/System.map-%{KERNELRELEASE} ]; then -cp /boot/vmlinuz-%{KERNELRELEASE} /boot/.vmlinuz-%{KERNELRELEASE}-rpm -cp /boot/System.map-%{KERNELRELEASE} /boot/.System.map-%{KERNELRELEASE}-rpm -rm -f /boot/vmlinuz-%{KERNELRELEASE} /boot/System.map-%{KERNELRELEASE} -/sbin/installkernel %{KERNELRELEASE} /boot/.vmlinuz-%{KERNELRELEASE}-rpm /boot/.System.map-%{KERNELRELEASE}-rpm -rm -f /boot/.vmlinuz-%{KERNELRELEASE}-rpm /boot/.System.map-%{KERNELRELEASE}-rpm +if [ -x /usr/bin/kernel-install ]; then + /usr/bin/kernel-install add %{KERNELRELEASE} /lib/modules/%{KERNELRELEASE}/vmlinuz fi +for file in vmlinuz System.map config; do + if ! cmp --silent "/lib/modules/%{KERNELRELEASE}/${file}" "/boot/${file}-%{KERNELRELEASE}"; then + cp "/lib/modules/%{KERNELRELEASE}/${file}" "/boot/${file}-%{KERNELRELEASE}" + fi +done %preun if [ -x /sbin/new-kernel-pkg ]; then @@ -94,7 +95,6 @@ fi %defattr (-, root, root) /lib/modules/%{KERNELRELEASE} %exclude /lib/modules/%{KERNELRELEASE}/build -/boot/* %files headers %defattr (-, root, root) -- GitLab From 1c1914d6e8c6edbf5b45047419ff51abdb1dce96 Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Wed, 17 Jan 2024 18:11:40 +0000 Subject: [PATCH 0395/1405] dma-buf: heaps: Don't track CMA dma-buf pages under RssFile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DMA buffers allocated from the CMA dma-buf heap get counted under RssFile for processes that map them and trigger page faults. In addition to the incorrect accounting reported to userspace, reclaim behavior was influenced by the MM_FILEPAGES counter until linux 6.8, but this memory is not reclaimable. [1] Change the CMA dma-buf heap to set VM_PFNMAP on the VMA so MM does not poke at the memory managed by this dma-buf heap, and use vmf_insert_pfn to correct the RSS accounting. The system dma-buf heap does not suffer from this issue since remap_pfn_range is used during the mmap of the buffer, which also sets VM_PFNMAP on the VMA. [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/mm/vmscan.c?id=fb46e22a9e3863e08aef8815df9f17d0f4b9aede Fixes: b61614ec318a ("dma-buf: heaps: Add CMA heap to dmabuf heaps") Signed-off-by: T.J. Mercier Acked-by: Christian König Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20240117181141.286383-1-tjmercier@google.com --- drivers/dma-buf/heaps/cma_heap.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c index ee899f8e67215..4a63567e93bae 100644 --- a/drivers/dma-buf/heaps/cma_heap.c +++ b/drivers/dma-buf/heaps/cma_heap.c @@ -168,10 +168,7 @@ static vm_fault_t cma_heap_vm_fault(struct vm_fault *vmf) if (vmf->pgoff > buffer->pagecount) return VM_FAULT_SIGBUS; - vmf->page = buffer->pages[vmf->pgoff]; - get_page(vmf->page); - - return 0; + return vmf_insert_pfn(vma, vmf->address, page_to_pfn(buffer->pages[vmf->pgoff])); } static const struct vm_operations_struct dma_heap_vm_ops = { @@ -185,6 +182,8 @@ static int cma_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0) return -EINVAL; + vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); + vma->vm_ops = &dma_heap_vm_ops; vma->vm_private_data = buffer; -- GitLab From bfef491df67022c56aab3b831044f8d259f9441f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 26 Jan 2024 22:30:10 +0900 Subject: [PATCH 0396/1405] kconfig: initialize sym->curr.tri to 'no' for all symbol types again Geert Uytterhoeven reported that commit 4e244c10eab3 ("kconfig: remove unneeded symbol_empty variable") changed the default value of CONFIG_LOG_CPU_MAX_BUF_SHIFT from 12 to 0. As it turned out, this is an undefined behavior because sym_calc_value() stopped setting the sym->curr.tri field for 'int', 'hex', and 'string' symbols. This commit restores the original behavior, where 'int', 'hex', 'string' symbols are interpreted as false if used in boolean contexts. CONFIG_LOG_CPU_MAX_BUF_SHIFT will default to 12 again, irrespective of CONFIG_BASE_SMALL. Presumably, this is not the intended behavior, as already reported [1], but this is another issue that should be addressed by a separate patch. [1]: https://lore.kernel.org/all/f6856be8-54b7-0fa0-1d17-39632bf29ada@oracle.com/ Fixes: 4e244c10eab3 ("kconfig: remove unneeded symbol_empty variable") Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/all/CAMuHMdWm6u1wX7efZQf=2XUAHascps76YQac6rdnQGhc8nop_Q@mail.gmail.com/ Tested-by: Geert Uytterhoeven Signed-off-by: Masahiro Yamada --- scripts/kconfig/symbol.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 3e808528aaeab..e9e9fb8d86746 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -345,6 +345,8 @@ void sym_calc_value(struct symbol *sym) oldval = sym->curr; + newval.tri = no; + switch (sym->type) { case S_INT: newval.val = "0"; @@ -357,7 +359,7 @@ void sym_calc_value(struct symbol *sym) break; case S_BOOLEAN: case S_TRISTATE: - newval = symbol_no.curr; + newval.val = "n"; break; default: sym->curr.val = sym->name; -- GitLab From 1e560864159d002b453da42bd2c13a1805515a20 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 30 Jan 2024 11:02:43 +0100 Subject: [PATCH 0397/1405] PCI/ASPM: Fix deadlock when enabling ASPM A last minute revert in 6.7-final introduced a potential deadlock when enabling ASPM during probe of Qualcomm PCIe controllers as reported by lockdep: ============================================ WARNING: possible recursive locking detected 6.7.0 #40 Not tainted -------------------------------------------- kworker/u16:5/90 is trying to acquire lock: ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pcie_aspm_pm_state_change+0x58/0xdc but task is already holding lock: ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pci_walk_bus+0x34/0xbc other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(pci_bus_sem); lock(pci_bus_sem); *** DEADLOCK *** Call trace: print_deadlock_bug+0x25c/0x348 __lock_acquire+0x10a4/0x2064 lock_acquire+0x1e8/0x318 down_read+0x60/0x184 pcie_aspm_pm_state_change+0x58/0xdc pci_set_full_power_state+0xa8/0x114 pci_set_power_state+0xc4/0x120 qcom_pcie_enable_aspm+0x1c/0x3c [pcie_qcom] pci_walk_bus+0x64/0xbc qcom_pcie_host_post_init_2_7_0+0x28/0x34 [pcie_qcom] The deadlock can easily be reproduced on machines like the Lenovo ThinkPad X13s by adding a delay to increase the race window during asynchronous probe where another thread can take a write lock. Add a new pci_set_power_state_locked() and associated helper functions that can be called with the PCI bus semaphore held to avoid taking the read lock twice. Link: https://lore.kernel.org/r/ZZu0qx2cmn7IwTyQ@hovoldconsulting.com Link: https://lore.kernel.org/r/20240130100243.11011-1-johan+linaro@kernel.org Fixes: f93e71aea6c6 ("Revert "PCI/ASPM: Remove pcie_aspm_pm_state_change()"") Signed-off-by: Johan Hovold Signed-off-by: Bjorn Helgaas Cc: # 6.7 --- drivers/pci/bus.c | 49 ++++++++++------ drivers/pci/controller/dwc/pcie-qcom.c | 2 +- drivers/pci/pci.c | 78 +++++++++++++++++--------- drivers/pci/pci.h | 4 +- drivers/pci/pcie/aspm.c | 13 +++-- include/linux/pci.h | 5 ++ 6 files changed, 101 insertions(+), 50 deletions(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 9c2137dae429a..826b5016a1010 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -386,21 +386,8 @@ void pci_bus_add_devices(const struct pci_bus *bus) } EXPORT_SYMBOL(pci_bus_add_devices); -/** pci_walk_bus - walk devices on/under bus, calling callback. - * @top bus whose devices should be walked - * @cb callback to be called for each device found - * @userdata arbitrary pointer to be passed to callback. - * - * Walk the given bus, including any bridged devices - * on buses under this bus. Call the provided callback - * on each device found. - * - * We check the return of @cb each time. If it returns anything - * other than 0, we break out. - * - */ -void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), - void *userdata) +static void __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), + void *userdata, bool locked) { struct pci_dev *dev; struct pci_bus *bus; @@ -408,7 +395,8 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), int retval; bus = top; - down_read(&pci_bus_sem); + if (!locked) + down_read(&pci_bus_sem); next = top->devices.next; for (;;) { if (next == &bus->devices) { @@ -431,10 +419,37 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), if (retval) break; } - up_read(&pci_bus_sem); + if (!locked) + up_read(&pci_bus_sem); +} + +/** + * pci_walk_bus - walk devices on/under bus, calling callback. + * @top: bus whose devices should be walked + * @cb: callback to be called for each device found + * @userdata: arbitrary pointer to be passed to callback + * + * Walk the given bus, including any bridged devices + * on buses under this bus. Call the provided callback + * on each device found. + * + * We check the return of @cb each time. If it returns anything + * other than 0, we break out. + */ +void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata) +{ + __pci_walk_bus(top, cb, userdata, false); } EXPORT_SYMBOL_GPL(pci_walk_bus); +void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata) +{ + lockdep_assert_held(&pci_bus_sem); + + __pci_walk_bus(top, cb, userdata, true); +} +EXPORT_SYMBOL_GPL(pci_walk_bus_locked); + struct pci_bus *pci_bus_get(struct pci_bus *bus) { if (bus) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 10f2d0bb86bec..2ce2a3bd932bd 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -972,7 +972,7 @@ static int qcom_pcie_enable_aspm(struct pci_dev *pdev, void *userdata) * Downstream devices need to be in D0 state before enabling PCI PM * substates. */ - pci_set_power_state(pdev, PCI_D0); + pci_set_power_state_locked(pdev, PCI_D0); pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL); return 0; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d8f11a078924c..9ab9b1008d8b9 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1354,6 +1354,7 @@ int pci_power_up(struct pci_dev *dev) /** * pci_set_full_power_state - Put a PCI device into D0 and update its state * @dev: PCI device to power up + * @locked: whether pci_bus_sem is held * * Call pci_power_up() to put @dev into D0, read from its PCI_PM_CTRL register * to confirm the state change, restore its BARs if they might be lost and @@ -1363,7 +1364,7 @@ int pci_power_up(struct pci_dev *dev) * to D0, it is more efficient to use pci_power_up() directly instead of this * function. */ -static int pci_set_full_power_state(struct pci_dev *dev) +static int pci_set_full_power_state(struct pci_dev *dev, bool locked) { u16 pmcsr; int ret; @@ -1399,7 +1400,7 @@ static int pci_set_full_power_state(struct pci_dev *dev) } if (dev->bus->self) - pcie_aspm_pm_state_change(dev->bus->self); + pcie_aspm_pm_state_change(dev->bus->self, locked); return 0; } @@ -1428,10 +1429,22 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state) pci_walk_bus(bus, __pci_dev_set_current_state, &state); } +static void __pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state, bool locked) +{ + if (!bus) + return; + + if (locked) + pci_walk_bus_locked(bus, __pci_dev_set_current_state, &state); + else + pci_walk_bus(bus, __pci_dev_set_current_state, &state); +} + /** * pci_set_low_power_state - Put a PCI device into a low-power state. * @dev: PCI device to handle. * @state: PCI power state (D1, D2, D3hot) to put the device into. + * @locked: whether pci_bus_sem is held * * Use the device's PCI_PM_CTRL register to put it into a low-power state. * @@ -1442,7 +1455,7 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state) * 0 if device already is in the requested state. * 0 if device's power state has been successfully changed. */ -static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state) +static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state, bool locked) { u16 pmcsr; @@ -1496,29 +1509,12 @@ static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state) pci_power_name(state)); if (dev->bus->self) - pcie_aspm_pm_state_change(dev->bus->self); + pcie_aspm_pm_state_change(dev->bus->self, locked); return 0; } -/** - * pci_set_power_state - Set the power state of a PCI device - * @dev: PCI device to handle. - * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. - * - * Transition a device to a new power state, using the platform firmware and/or - * the device's PCI PM registers. - * - * RETURN VALUE: - * -EINVAL if the requested state is invalid. - * -EIO if device does not support PCI PM or its PM capabilities register has a - * wrong version, or device doesn't support the requested state. - * 0 if the transition is to D1 or D2 but D1 and D2 are not supported. - * 0 if device already is in the requested state. - * 0 if the transition is to D3 but D3 is not supported. - * 0 if device's power state has been successfully changed. - */ -int pci_set_power_state(struct pci_dev *dev, pci_power_t state) +static int __pci_set_power_state(struct pci_dev *dev, pci_power_t state, bool locked) { int error; @@ -1542,7 +1538,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) return 0; if (state == PCI_D0) - return pci_set_full_power_state(dev); + return pci_set_full_power_state(dev, locked); /* * This device is quirked not to be put into D3, so don't put it in @@ -1556,16 +1552,16 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) * To put the device in D3cold, put it into D3hot in the native * way, then put it into D3cold using platform ops. */ - error = pci_set_low_power_state(dev, PCI_D3hot); + error = pci_set_low_power_state(dev, PCI_D3hot, locked); if (pci_platform_power_transition(dev, PCI_D3cold)) return error; /* Powering off a bridge may power off the whole hierarchy */ if (dev->current_state == PCI_D3cold) - pci_bus_set_current_state(dev->subordinate, PCI_D3cold); + __pci_bus_set_current_state(dev->subordinate, PCI_D3cold, locked); } else { - error = pci_set_low_power_state(dev, state); + error = pci_set_low_power_state(dev, state, locked); if (pci_platform_power_transition(dev, state)) return error; @@ -1573,8 +1569,38 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) return 0; } + +/** + * pci_set_power_state - Set the power state of a PCI device + * @dev: PCI device to handle. + * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. + * + * Transition a device to a new power state, using the platform firmware and/or + * the device's PCI PM registers. + * + * RETURN VALUE: + * -EINVAL if the requested state is invalid. + * -EIO if device does not support PCI PM or its PM capabilities register has a + * wrong version, or device doesn't support the requested state. + * 0 if the transition is to D1 or D2 but D1 and D2 are not supported. + * 0 if device already is in the requested state. + * 0 if the transition is to D3 but D3 is not supported. + * 0 if device's power state has been successfully changed. + */ +int pci_set_power_state(struct pci_dev *dev, pci_power_t state) +{ + return __pci_set_power_state(dev, state, false); +} EXPORT_SYMBOL(pci_set_power_state); +int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state) +{ + lockdep_assert_held(&pci_bus_sem); + + return __pci_set_power_state(dev, state, true); +} +EXPORT_SYMBOL(pci_set_power_state_locked); + #define PCI_EXP_SAVE_REGS 7 static struct pci_cap_saved_state *_pci_find_saved_cap(struct pci_dev *pci_dev, diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 2336a8d1edab2..e9750b1b19bad 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -571,12 +571,12 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt); #ifdef CONFIG_PCIEASPM void pcie_aspm_init_link_state(struct pci_dev *pdev); void pcie_aspm_exit_link_state(struct pci_dev *pdev); -void pcie_aspm_pm_state_change(struct pci_dev *pdev); +void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked); void pcie_aspm_powersave_config_link(struct pci_dev *pdev); #else static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { } -static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) { } +static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { } static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { } #endif diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 5a0066ecc3c5a..bc0bd86695ec6 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -1003,8 +1003,11 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev) up_read(&pci_bus_sem); } -/* @pdev: the root port or switch downstream port */ -void pcie_aspm_pm_state_change(struct pci_dev *pdev) +/* + * @pdev: the root port or switch downstream port + * @locked: whether pci_bus_sem is held + */ +void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { struct pcie_link_state *link = pdev->link_state; @@ -1014,12 +1017,14 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev) * Devices changed PM state, we should recheck if latency * meets all functions' requirement */ - down_read(&pci_bus_sem); + if (!locked) + down_read(&pci_bus_sem); mutex_lock(&aspm_lock); pcie_update_aspm_capable(link->root); pcie_config_aspm_path(link); mutex_unlock(&aspm_lock); - up_read(&pci_bus_sem); + if (!locked) + up_read(&pci_bus_sem); } void pcie_aspm_powersave_config_link(struct pci_dev *pdev) diff --git a/include/linux/pci.h b/include/linux/pci.h index add9368e6314b..7ab0d13672daf 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1422,6 +1422,7 @@ int pci_load_and_free_saved_state(struct pci_dev *dev, struct pci_saved_state **state); int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state); int pci_set_power_state(struct pci_dev *dev, pci_power_t state); +int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state); pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); bool pci_pme_capable(struct pci_dev *dev, pci_power_t state); void pci_pme_active(struct pci_dev *dev, bool enable); @@ -1625,6 +1626,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata); +void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), + void *userdata); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); void pci_setup_bridge(struct pci_bus *bus); @@ -2025,6 +2028,8 @@ static inline int pci_save_state(struct pci_dev *dev) { return 0; } static inline void pci_restore_state(struct pci_dev *dev) { } static inline int pci_set_power_state(struct pci_dev *dev, pci_power_t state) { return 0; } +static inline int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state) +{ return 0; } static inline int pci_wake_from_d3(struct pci_dev *dev, bool enable) { return 0; } static inline pci_power_t pci_choose_state(struct pci_dev *dev, -- GitLab From 925bd5e08106bd5bfbd1cb8e124c89a0b4003569 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 29 Jan 2024 17:59:33 +0100 Subject: [PATCH 0398/1405] MAINTAINERS: Add Manivannan Sadhasivam as PCI Endpoint maintainer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PCI endpoint subsystem is evolving at a rate I cannot keep up with, therefore I am standing down as a maintainer handing over to Manivannan (currently reviewer for this code) and Krzysztof who are doing an excellent job on the matter - they don't need my help any longer. Link: https://lore.kernel.org/r/20240129165933.33428-1-lpieralisi@kernel.org Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Acked-by: Manivannan Sadhasivam Cc: Krzysztof Wilczyński --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a692..a40cfcd1c65e7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16856,9 +16856,8 @@ F: Documentation/devicetree/bindings/pci/xilinx-versal-cpm.yaml F: drivers/pci/controller/pcie-xilinx-cpm.c PCI ENDPOINT SUBSYSTEM -M: Lorenzo Pieralisi +M: Manivannan Sadhasivam M: Krzysztof Wilczyński -R: Manivannan Sadhasivam R: Kishon Vijay Abraham I L: linux-pci@vger.kernel.org S: Supported -- GitLab From 9189526c46f2ad14df25dbdc30443f79d03dc7bd Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Wed, 24 Jan 2024 23:50:31 +0100 Subject: [PATCH 0399/1405] MAINTAINERS: Update i2c host drivers repository The i2c host patches are now set to be merged into the following repository: git://git.kernel.org/pub/scm/linux/kernel/git/andi.shyti/linux.git Cc: Wolfram Sang Acked-by: Wolfram Sang Signed-off-by: Andi Shyti --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8999497011a26..179929cce68c5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10091,7 +10091,7 @@ L: linux-i2c@vger.kernel.org S: Maintained W: https://i2c.wiki.kernel.org/ Q: https://patchwork.ozlabs.org/project/linux-i2c/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/andi.shyti/linux.git F: Documentation/devicetree/bindings/i2c/ F: drivers/i2c/algos/ F: drivers/i2c/busses/ -- GitLab From 6231c9e1a9f35b535c66709aa8a6eda40dbc4132 Mon Sep 17 00:00:00 2001 From: Prasad Pandit Date: Wed, 3 Jan 2024 13:23:43 +0530 Subject: [PATCH 0400/1405] KVM: x86: make KVM_REQ_NMI request iff NMI pending for vcpu kvm_vcpu_ioctl_x86_set_vcpu_events() routine makes 'KVM_REQ_NMI' request for a vcpu even when its 'events->nmi.pending' is zero. Ex: qemu_thread_start kvm_vcpu_thread_fn qemu_wait_io_event qemu_wait_io_event_common process_queued_cpu_work do_kvm_cpu_synchronize_post_init/_reset kvm_arch_put_registers kvm_put_vcpu_events (cpu, level=[2|3]) This leads vCPU threads in QEMU to constantly acquire & release the global mutex lock, delaying the guest boot due to lock contention. Add check to make KVM_REQ_NMI request only if vcpu has NMI pending. Fixes: bdedff263132 ("KVM: x86: Route pending NMIs from userspace through process_nmi()") Cc: stable@vger.kernel.org Signed-off-by: Prasad Pandit Link: https://lore.kernel.org/r/20240103075343.549293-1-ppandit@redhat.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 363b1c0802057..25bc52cdf8f45 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5454,7 +5454,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) { vcpu->arch.nmi_pending = 0; atomic_set(&vcpu->arch.nmi_queued, events->nmi.pending); - kvm_make_request(KVM_REQ_NMI, vcpu); + if (events->nmi.pending) + kvm_make_request(KVM_REQ_NMI, vcpu); } static_call(kvm_x86_set_nmi_mask)(vcpu, events->nmi.masked); -- GitLab From 5b778e1c2e9760ffe99482de26e70cd74683ba5c Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 30 Jan 2024 02:42:35 -0800 Subject: [PATCH 0401/1405] wifi: fill in MODULE_DESCRIPTION()s for wlcore W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the TI WLAN wlcore drivers. Signed-off-by: Breno Leitao Signed-off-by: Kalle Valo Link: https://msgid.link/20240130104243.3025393-2-leitao@debian.org --- drivers/net/wireless/ti/wlcore/main.c | 1 + drivers/net/wireless/ti/wlcore/sdio.c | 1 + drivers/net/wireless/ti/wlcore/spi.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index fb9ed97774c7a..5736acb4d2063 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -6793,6 +6793,7 @@ MODULE_PARM_DESC(bug_on_recovery, "BUG() on fw recovery"); module_param(no_recovery, int, 0600); MODULE_PARM_DESC(no_recovery, "Prevent HW recovery. FW will remain stuck."); +MODULE_DESCRIPTION("TI WLAN core driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Luciano Coelho "); MODULE_AUTHOR("Juuso Oikarinen "); diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c index f0686635db46e..eb5482ed76ae4 100644 --- a/drivers/net/wireless/ti/wlcore/sdio.c +++ b/drivers/net/wireless/ti/wlcore/sdio.c @@ -447,6 +447,7 @@ module_sdio_driver(wl1271_sdio_driver); module_param(dump, bool, 0600); MODULE_PARM_DESC(dump, "Enable sdio read/write dumps."); +MODULE_DESCRIPTION("TI WLAN SDIO helpers"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Luciano Coelho "); MODULE_AUTHOR("Juuso Oikarinen "); diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index 7d9a139db59e1..0aa2b2f3c5c91 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -562,6 +562,7 @@ static struct spi_driver wl1271_spi_driver = { }; module_spi_driver(wl1271_spi_driver); +MODULE_DESCRIPTION("TI WLAN SPI helpers"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Luciano Coelho "); MODULE_AUTHOR("Juuso Oikarinen "); -- GitLab From 2f2b503ea770cf817044851a583e8880d1de4ae2 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 30 Jan 2024 02:42:36 -0800 Subject: [PATCH 0402/1405] wifi: fill in MODULE_DESCRIPTION()s for wl1251 and wl12xx W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the TI wireless drivers wl12xx and wl1251. Signed-off-by: Breno Leitao Signed-off-by: Kalle Valo Link: https://msgid.link/20240130104243.3025393-3-leitao@debian.org --- drivers/net/wireless/ti/wl1251/sdio.c | 1 + drivers/net/wireless/ti/wl1251/spi.c | 1 + drivers/net/wireless/ti/wl12xx/main.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/net/wireless/ti/wl1251/sdio.c b/drivers/net/wireless/ti/wl1251/sdio.c index 301bd0043a435..4e5b351f80f09 100644 --- a/drivers/net/wireless/ti/wl1251/sdio.c +++ b/drivers/net/wireless/ti/wl1251/sdio.c @@ -343,5 +343,6 @@ static void __exit wl1251_sdio_exit(void) module_init(wl1251_sdio_init); module_exit(wl1251_sdio_exit); +MODULE_DESCRIPTION("TI WL1251 SDIO helpers"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Kalle Valo "); diff --git a/drivers/net/wireless/ti/wl1251/spi.c b/drivers/net/wireless/ti/wl1251/spi.c index 29292f06bd3dc..1936bb3af54ab 100644 --- a/drivers/net/wireless/ti/wl1251/spi.c +++ b/drivers/net/wireless/ti/wl1251/spi.c @@ -342,6 +342,7 @@ static struct spi_driver wl1251_spi_driver = { module_spi_driver(wl1251_spi_driver); +MODULE_DESCRIPTION("TI WL1251 SPI helpers"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Kalle Valo "); MODULE_ALIAS("spi:wl1251"); diff --git a/drivers/net/wireless/ti/wl12xx/main.c b/drivers/net/wireless/ti/wl12xx/main.c index de045fe4ca1eb..b26d42b4e3cc0 100644 --- a/drivers/net/wireless/ti/wl12xx/main.c +++ b/drivers/net/wireless/ti/wl12xx/main.c @@ -1955,6 +1955,7 @@ module_param_named(tcxo, tcxo_param, charp, 0); MODULE_PARM_DESC(tcxo, "TCXO clock: 19.2, 26, 38.4, 52, 16.368, 32.736, 16.8, 33.6"); +MODULE_DESCRIPTION("TI WL12xx wireless driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Luciano Coelho "); MODULE_FIRMWARE(WL127X_FW_NAME_SINGLE); -- GitLab From 257ca10c7317d4a424e48bb95d14ca53a1f1dd6f Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 30 Jan 2024 02:42:37 -0800 Subject: [PATCH 0403/1405] wifi: fill in MODULE_DESCRIPTION()s for Broadcom WLAN W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the Broadcom FullMac WLAN drivers. Signed-off-by: Breno Leitao Signed-off-by: Kalle Valo Link: https://msgid.link/20240130104243.3025393-4-leitao@debian.org --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/module.c | 1 + drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/module.c | 1 + drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/module.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/module.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/module.c index d55f3271d6190..4f0c1e1a8e605 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/module.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/module.c @@ -20,6 +20,7 @@ static void __exit brcmf_bca_exit(void) brcmf_fwvid_unregister_vendor(BRCMF_FWVENDOR_BCA, THIS_MODULE); } +MODULE_DESCRIPTION("Broadcom FullMAC WLAN driver plugin for Broadcom AP chipsets"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_IMPORT_NS(BRCMFMAC); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/module.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/module.c index f82fbbe3ecefb..90d06cda03a2f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/module.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/module.c @@ -20,6 +20,7 @@ static void __exit brcmf_cyw_exit(void) brcmf_fwvid_unregister_vendor(BRCMF_FWVENDOR_CYW, THIS_MODULE); } +MODULE_DESCRIPTION("Broadcom FullMAC WLAN driver plugin for Cypress/Infineon chipsets"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_IMPORT_NS(BRCMFMAC); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/module.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/module.c index 02918d434556b..b66135e3cff47 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/module.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/module.c @@ -20,6 +20,7 @@ static void __exit brcmf_wcc_exit(void) brcmf_fwvid_unregister_vendor(BRCMF_FWVENDOR_WCC, THIS_MODULE); } +MODULE_DESCRIPTION("Broadcom FullMAC WLAN driver plugin for Broadcom mobility chipsets"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_IMPORT_NS(BRCMFMAC); -- GitLab From f8782ea450ad83df5f3dfdb1fca093f46238febe Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 30 Jan 2024 02:42:38 -0800 Subject: [PATCH 0404/1405] wifi: fill in MODULE_DESCRIPTION()s for ar5523 W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the Atheros AR5523 wireless driver. Signed-off-by: Breno Leitao Signed-off-by: Kalle Valo Link: https://msgid.link/20240130104243.3025393-5-leitao@debian.org --- drivers/net/wireless/ath/ar5523/ar5523.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index 43e0db78d42be..a742cec44e3db 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -1803,5 +1803,6 @@ static struct usb_driver ar5523_driver = { module_usb_driver(ar5523_driver); +MODULE_DESCRIPTION("Atheros AR5523 wireless driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_FIRMWARE(AR5523_FIRMWARE_FILE); -- GitLab From e063d2a05d713d396044124867704b08e5c30860 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 30 Jan 2024 02:42:39 -0800 Subject: [PATCH 0405/1405] wifi: fill in MODULE_DESCRIPTION()s for wcn36xx W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the Qualcomm Atheros WCN3660/3680 wireless driver. Signed-off-by: Breno Leitao Reviewed-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://msgid.link/20240130104243.3025393-6-leitao@debian.org --- drivers/net/wireless/ath/wcn36xx/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c index 41119fb177e30..4e6b4df8562f6 100644 --- a/drivers/net/wireless/ath/wcn36xx/main.c +++ b/drivers/net/wireless/ath/wcn36xx/main.c @@ -1685,6 +1685,7 @@ static struct platform_driver wcn36xx_driver = { module_platform_driver(wcn36xx_driver); +MODULE_DESCRIPTION("Qualcomm Atheros WCN3660/3680 wireless driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Eugene Krasnikov k.eugene.e@gmail.com"); MODULE_FIRMWARE(WLAN_NV_FILE); -- GitLab From 714ea2f109d9d561789078fd8a1beeffa9af36d6 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 30 Jan 2024 02:42:40 -0800 Subject: [PATCH 0406/1405] wifi: fill in MODULE_DESCRIPTION()s for p54spi W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the Prism54 SPI wireless driver. Signed-off-by: Breno Leitao Signed-off-by: Kalle Valo Link: https://msgid.link/20240130104243.3025393-7-leitao@debian.org --- drivers/net/wireless/intersil/p54/p54spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intersil/p54/p54spi.c b/drivers/net/wireless/intersil/p54/p54spi.c index ce0179b8ab368..0073b5e0f9c90 100644 --- a/drivers/net/wireless/intersil/p54/p54spi.c +++ b/drivers/net/wireless/intersil/p54/p54spi.c @@ -700,6 +700,7 @@ static struct spi_driver p54spi_driver = { module_spi_driver(p54spi_driver); +MODULE_DESCRIPTION("Prism54 SPI wireless driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Christian Lamparter "); MODULE_ALIAS("spi:cx3110x"); -- GitLab From 35337ac472605da9db051827fa2d39e6c02c6d81 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 30 Jan 2024 02:42:41 -0800 Subject: [PATCH 0407/1405] wifi: fill in MODULE_DESCRIPTION()s for wl18xx W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the TI WiLink 8 wireless driver. Signed-off-by: Breno Leitao Signed-off-by: Kalle Valo Link: https://msgid.link/20240130104243.3025393-8-leitao@debian.org --- drivers/net/wireless/ti/wl18xx/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ti/wl18xx/main.c b/drivers/net/wireless/ti/wl18xx/main.c index 20d9181b3410c..2ccac1cdec012 100644 --- a/drivers/net/wireless/ti/wl18xx/main.c +++ b/drivers/net/wireless/ti/wl18xx/main.c @@ -2086,6 +2086,7 @@ module_param_named(num_rx_desc, num_rx_desc_param, int, 0400); MODULE_PARM_DESC(num_rx_desc_param, "Number of Rx descriptors: u8 (default is 32)"); +MODULE_DESCRIPTION("TI WiLink 8 wireless driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Luciano Coelho "); MODULE_FIRMWARE(WL18XX_FW_NAME); -- GitLab From c9013880284d78bac6498d9c0b0b7043cf0f5639 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 30 Jan 2024 02:42:42 -0800 Subject: [PATCH 0408/1405] wifi: fill in MODULE_DESCRIPTION()s for wilc1000 W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the Atmel WILC1000 SPI driver. Signed-off-by: Breno Leitao Signed-off-by: Kalle Valo Link: https://msgid.link/20240130104243.3025393-9-leitao@debian.org --- drivers/net/wireless/microchip/wilc1000/netdev.c | 1 + drivers/net/wireless/microchip/wilc1000/sdio.c | 1 + drivers/net/wireless/microchip/wilc1000/spi.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.c b/drivers/net/wireless/microchip/wilc1000/netdev.c index 91d71e0f7ef23..81e8f25863f5b 100644 --- a/drivers/net/wireless/microchip/wilc1000/netdev.c +++ b/drivers/net/wireless/microchip/wilc1000/netdev.c @@ -1018,5 +1018,6 @@ struct wilc_vif *wilc_netdev_ifc_init(struct wilc *wl, const char *name, return ERR_PTR(ret); } +MODULE_DESCRIPTION("Atmel WILC1000 core wireless driver"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE(WILC1000_FW(WILC1000_API_VER)); diff --git a/drivers/net/wireless/microchip/wilc1000/sdio.c b/drivers/net/wireless/microchip/wilc1000/sdio.c index 0d13e3e46e98e..d6d3946930905 100644 --- a/drivers/net/wireless/microchip/wilc1000/sdio.c +++ b/drivers/net/wireless/microchip/wilc1000/sdio.c @@ -984,4 +984,5 @@ static struct sdio_driver wilc_sdio_driver = { module_driver(wilc_sdio_driver, sdio_register_driver, sdio_unregister_driver); +MODULE_DESCRIPTION("Atmel WILC1000 SDIO wireless driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/wireless/microchip/wilc1000/spi.c b/drivers/net/wireless/microchip/wilc1000/spi.c index 77b4cdff73c37..1d8b241ce43ca 100644 --- a/drivers/net/wireless/microchip/wilc1000/spi.c +++ b/drivers/net/wireless/microchip/wilc1000/spi.c @@ -273,6 +273,7 @@ static struct spi_driver wilc_spi_driver = { .remove = wilc_bus_remove, }; module_spi_driver(wilc_spi_driver); +MODULE_DESCRIPTION("Atmel WILC1000 SPI wireless driver"); MODULE_LICENSE("GPL"); static int wilc_spi_tx(struct wilc *wilc, u8 *b, u32 len) -- GitLab From f3f8f050316893fe2da523458ff7f5f6d61fb1a6 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 30 Jan 2024 02:42:43 -0800 Subject: [PATCH 0409/1405] wifi: fill in MODULE_DESCRIPTION()s for mt76 drivers W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the MediaTek mt76 drivers. Here is a sorted list of descriptions. It might make the reviewing process easier. MODULE_DESCRIPTION("MediaTek MT7603E and MT76x8 wireless driver"); MODULE_DESCRIPTION("MediaTek MT7615E and MT7663E wireless driver"); MODULE_DESCRIPTION("MediaTek MT7615E MMIO helpers"); MODULE_DESCRIPTION("MediaTek MT7663 SDIO/USB helpers"); MODULE_DESCRIPTION("MediaTek MT7663S (SDIO) wireless driver"); MODULE_DESCRIPTION("MediaTek MT7663U (USB) wireless driver"); MODULE_DESCRIPTION("MediaTek MT76x02 helpers"); MODULE_DESCRIPTION("MediaTek MT76x02 MCU helpers"); MODULE_DESCRIPTION("MediaTek MT76x0E (PCIe) wireless driver"); MODULE_DESCRIPTION("MediaTek MT76x0U (USB) wireless driver"); MODULE_DESCRIPTION("MediaTek MT76x2 EEPROM helpers"); MODULE_DESCRIPTION("MediaTek MT76x2E (PCIe) wireless driver"); MODULE_DESCRIPTION("MediaTek MT76x2U (USB) wireless driver"); MODULE_DESCRIPTION("MediaTek MT76x connac layer helpers"); MODULE_DESCRIPTION("MediaTek MT76x EEPROM helpers"); MODULE_DESCRIPTION("MediaTek MT76x helpers"); MODULE_DESCRIPTION("MediaTek MT76x SDIO helpers"); MODULE_DESCRIPTION("MediaTek MT76x USB helpers"); MODULE_DESCRIPTION("MediaTek MT7915E MMIO helpers"); MODULE_DESCRIPTION("MediaTek MT7921 core driver"); MODULE_DESCRIPTION("MediaTek MT7921E (PCIe) wireless driver"); MODULE_DESCRIPTION("MediaTek MT7921S (SDIO) wireless driver"); MODULE_DESCRIPTION("MediaTek MT7921U (USB) wireless driver"); MODULE_DESCRIPTION("MediaTek MT7925 core driver"); MODULE_DESCRIPTION("MediaTek MT7925E (PCIe) wireless driver"); MODULE_DESCRIPTION("MediaTek MT7925U (USB) wireless driver"); MODULE_DESCRIPTION("MediaTek MT792x core driver"); MODULE_DESCRIPTION("MediaTek MT792x USB helpers"); MODULE_DESCRIPTION("MediaTek MT7996 MMIO helpers"); Signed-off-by: Breno Leitao Signed-off-by: Kalle Valo Link: https://msgid.link/20240130104243.3025393-10-leitao@debian.org --- drivers/net/wireless/mediatek/mt76/mt7603/main.c | 1 + drivers/net/wireless/mediatek/mt76/mt7615/main.c | 1 + drivers/net/wireless/mediatek/mt76/mt7615/mmio.c | 1 + drivers/net/wireless/mediatek/mt76/mt7615/sdio.c | 1 + drivers/net/wireless/mediatek/mt76/mt7615/usb.c | 1 + drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c | 1 + drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c | 1 + drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c | 1 + drivers/net/wireless/mediatek/mt76/mt76x0/pci.c | 1 + drivers/net/wireless/mediatek/mt76/mt76x0/usb.c | 1 + drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c | 1 + drivers/net/wireless/mediatek/mt76/mt76x02_util.c | 1 + drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c | 1 + drivers/net/wireless/mediatek/mt76/mt76x2/pci.c | 1 + drivers/net/wireless/mediatek/mt76/mt76x2/usb.c | 1 + drivers/net/wireless/mediatek/mt76/mt7915/mmio.c | 1 + drivers/net/wireless/mediatek/mt76/mt7921/main.c | 1 + drivers/net/wireless/mediatek/mt76/mt7921/pci.c | 1 + drivers/net/wireless/mediatek/mt76/mt7921/sdio.c | 1 + drivers/net/wireless/mediatek/mt76/mt7921/usb.c | 1 + drivers/net/wireless/mediatek/mt76/mt7925/main.c | 1 + drivers/net/wireless/mediatek/mt76/mt7925/pci.c | 1 + drivers/net/wireless/mediatek/mt76/mt7925/usb.c | 1 + drivers/net/wireless/mediatek/mt76/mt792x_core.c | 1 + drivers/net/wireless/mediatek/mt76/mt792x_usb.c | 1 + drivers/net/wireless/mediatek/mt76/mt7996/mmio.c | 1 + drivers/net/wireless/mediatek/mt76/sdio.c | 1 + drivers/net/wireless/mediatek/mt76/usb.c | 1 + drivers/net/wireless/mediatek/mt76/util.c | 1 + 29 files changed, 29 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index 89d738deea62e..e2146d30e5536 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -728,6 +728,7 @@ const struct ieee80211_ops mt7603_ops = { .set_sar_specs = mt7603_set_sar_specs, }; +MODULE_DESCRIPTION("MediaTek MT7603E and MT76x8 wireless driver"); MODULE_LICENSE("Dual BSD/GPL"); static int __init mt7603_init(void) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c index dab16b5fc3861..0971c164b57e9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c @@ -1375,4 +1375,5 @@ const struct ieee80211_ops mt7615_ops = { }; EXPORT_SYMBOL_GPL(mt7615_ops); +MODULE_DESCRIPTION("MediaTek MT7615E and MT7663E wireless driver"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c index ac036a072439d..87a956ea3ad74 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c @@ -270,4 +270,5 @@ static void __exit mt7615_exit(void) module_init(mt7615_init); module_exit(mt7615_exit); +MODULE_DESCRIPTION("MediaTek MT7615E MMIO helpers"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/sdio.c b/drivers/net/wireless/mediatek/mt76/mt7615/sdio.c index 67cedd2555f97..9692890ba51b7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/sdio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/sdio.c @@ -253,4 +253,5 @@ module_sdio_driver(mt7663s_driver); MODULE_AUTHOR("Sean Wang "); MODULE_AUTHOR("Lorenzo Bianconi "); +MODULE_DESCRIPTION("MediaTek MT7663S (SDIO) wireless driver"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c index 04963b9f74983..df737e1ff27b7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c @@ -281,4 +281,5 @@ module_usb_driver(mt7663u_driver); MODULE_AUTHOR("Sean Wang "); MODULE_AUTHOR("Lorenzo Bianconi "); +MODULE_DESCRIPTION("MediaTek MT7663U (USB) wireless driver"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c index 0052d103e276a..820b395900275 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c @@ -349,4 +349,5 @@ EXPORT_SYMBOL_GPL(mt7663_usb_sdio_register_device); MODULE_AUTHOR("Lorenzo Bianconi "); MODULE_AUTHOR("Sean Wang "); +MODULE_DESCRIPTION("MediaTek MT7663 SDIO/USB helpers"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c index 96494ba2fdf76..3a20ba0d24928 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c @@ -3160,4 +3160,5 @@ int mt76_connac2_mcu_fill_message(struct mt76_dev *dev, struct sk_buff *skb, EXPORT_SYMBOL_GPL(mt76_connac2_mcu_fill_message); MODULE_AUTHOR("Lorenzo Bianconi "); +MODULE_DESCRIPTION("MediaTek MT76x connac layer helpers"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c index c3a392a1a659e..bcd24c9072ec9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c @@ -342,4 +342,5 @@ int mt76x0_eeprom_init(struct mt76x02_dev *dev) return 0; } +MODULE_DESCRIPTION("MediaTek MT76x EEPROM helpers"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c index 9277ff38b7a22..293e66fa83d5d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c @@ -302,6 +302,7 @@ static const struct pci_device_id mt76x0e_device_table[] = { MODULE_DEVICE_TABLE(pci, mt76x0e_device_table); MODULE_FIRMWARE(MT7610E_FIRMWARE); MODULE_FIRMWARE(MT7650E_FIRMWARE); +MODULE_DESCRIPTION("MediaTek MT76x0E (PCIe) wireless driver"); MODULE_LICENSE("Dual BSD/GPL"); static struct pci_driver mt76x0e_driver = { diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c index 0422c332354a1..dd042949cf82b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c @@ -336,6 +336,7 @@ static int __maybe_unused mt76x0_resume(struct usb_interface *usb_intf) MODULE_DEVICE_TABLE(usb, mt76x0_device_table); MODULE_FIRMWARE(MT7610E_FIRMWARE); MODULE_FIRMWARE(MT7610U_FIRMWARE); +MODULE_DESCRIPTION("MediaTek MT76x0U (USB) wireless driver"); MODULE_LICENSE("GPL"); static struct usb_driver mt76x0_driver = { diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c index 02da543dfc5cf..b2cc449142945 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c @@ -293,4 +293,5 @@ void mt76x02u_init_mcu(struct mt76_dev *dev) EXPORT_SYMBOL_GPL(mt76x02u_init_mcu); MODULE_AUTHOR("Lorenzo Bianconi "); +MODULE_DESCRIPTION("MediaTek MT76x02 MCU helpers"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c index 8a0e8124b8940..8020446be37bd 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c @@ -696,4 +696,5 @@ void mt76x02_config_mac_addr_list(struct mt76x02_dev *dev) } EXPORT_SYMBOL_GPL(mt76x02_config_mac_addr_list); +MODULE_DESCRIPTION("MediaTek MT76x02 helpers"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c index 8c01855885ce3..1fe5f5a02f937 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c @@ -506,4 +506,5 @@ int mt76x2_eeprom_init(struct mt76x02_dev *dev) } EXPORT_SYMBOL_GPL(mt76x2_eeprom_init); +MODULE_DESCRIPTION("MediaTek MT76x2 EEPROM helpers"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c index df85ebc6e1df0..30959746e9242 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c @@ -165,6 +165,7 @@ mt76x2e_resume(struct pci_dev *pdev) MODULE_DEVICE_TABLE(pci, mt76x2e_device_table); MODULE_FIRMWARE(MT7662_FIRMWARE); MODULE_FIRMWARE(MT7662_ROM_PATCH); +MODULE_DESCRIPTION("MediaTek MT76x2E (PCIe) wireless driver"); MODULE_LICENSE("Dual BSD/GPL"); static struct pci_driver mt76pci_driver = { diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c index 55068f3252ef3..ca78e14251c2f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c @@ -147,4 +147,5 @@ static struct usb_driver mt76x2u_driver = { module_usb_driver(mt76x2u_driver); MODULE_AUTHOR("Lorenzo Bianconi "); +MODULE_DESCRIPTION("MediaTek MT76x2U (USB) wireless driver"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c index aff4f21e843d2..3039f53e22454 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c @@ -958,4 +958,5 @@ static void __exit mt7915_exit(void) module_init(mt7915_init); module_exit(mt7915_exit); +MODULE_DESCRIPTION("MediaTek MT7915E MMIO helpers"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 0645417e05825..0d5adc5ddae38 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -1418,5 +1418,6 @@ const struct ieee80211_ops mt7921_ops = { }; EXPORT_SYMBOL_GPL(mt7921_ops); +MODULE_DESCRIPTION("MediaTek MT7921 core driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Sean Wang "); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index 57903c6e4f11f..dde26f3274783 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -544,4 +544,5 @@ MODULE_FIRMWARE(MT7922_FIRMWARE_WM); MODULE_FIRMWARE(MT7922_ROM_PATCH); MODULE_AUTHOR("Sean Wang "); MODULE_AUTHOR("Lorenzo Bianconi "); +MODULE_DESCRIPTION("MediaTek MT7921E (PCIe) wireless driver"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c b/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c index 7591e54d28973..a9ce1e746b954 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/sdio.c @@ -323,5 +323,6 @@ static struct sdio_driver mt7921s_driver = { .drv.pm = pm_sleep_ptr(&mt7921s_pm_ops), }; module_sdio_driver(mt7921s_driver); +MODULE_DESCRIPTION("MediaTek MT7921S (SDIO) wireless driver"); MODULE_AUTHOR("Sean Wang "); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/usb.c b/drivers/net/wireless/mediatek/mt76/mt7921/usb.c index e5258c74fc077..8b7c03c47598d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/usb.c @@ -336,5 +336,6 @@ static struct usb_driver mt7921u_driver = { }; module_usb_driver(mt7921u_driver); +MODULE_DESCRIPTION("MediaTek MT7921U (USB) wireless driver"); MODULE_AUTHOR("Lorenzo Bianconi "); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/main.c b/drivers/net/wireless/mediatek/mt76/mt7925/main.c index 8f1075da49039..125a1be3cb64c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/main.c @@ -1450,4 +1450,5 @@ const struct ieee80211_ops mt7925_ops = { EXPORT_SYMBOL_GPL(mt7925_ops); MODULE_AUTHOR("Deren Wu "); +MODULE_DESCRIPTION("MediaTek MT7925 core driver"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/pci.c b/drivers/net/wireless/mediatek/mt76/mt7925/pci.c index 734f31ee40d3f..1fd99a8565415 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/pci.c @@ -583,4 +583,5 @@ MODULE_FIRMWARE(MT7925_FIRMWARE_WM); MODULE_FIRMWARE(MT7925_ROM_PATCH); MODULE_AUTHOR("Deren Wu "); MODULE_AUTHOR("Lorenzo Bianconi "); +MODULE_DESCRIPTION("MediaTek MT7925E (PCIe) wireless driver"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/usb.c b/drivers/net/wireless/mediatek/mt76/mt7925/usb.c index 9b885c5b3ed59..1e0f094fc9059 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7925/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt7925/usb.c @@ -329,4 +329,5 @@ static struct usb_driver mt7925u_driver = { module_usb_driver(mt7925u_driver); MODULE_AUTHOR("Lorenzo Bianconi "); +MODULE_DESCRIPTION("MediaTek MT7925U (USB) wireless driver"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_core.c b/drivers/net/wireless/mediatek/mt76/mt792x_core.c index 502be22dbe367..c42101aa9e45e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_core.c @@ -862,5 +862,6 @@ int mt792x_load_firmware(struct mt792x_dev *dev) } EXPORT_SYMBOL_GPL(mt792x_load_firmware); +MODULE_DESCRIPTION("MediaTek MT792x core driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Lorenzo Bianconi "); diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c index 2dd283caed36b..589a3efb9f8c3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c @@ -314,5 +314,6 @@ void mt792xu_disconnect(struct usb_interface *usb_intf) } EXPORT_SYMBOL_GPL(mt792xu_disconnect); +MODULE_DESCRIPTION("MediaTek MT792x USB helpers"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Lorenzo Bianconi "); diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c index c50d89a445e95..9f2abfa273c9b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mmio.c @@ -650,4 +650,5 @@ static void __exit mt7996_exit(void) module_init(mt7996_init); module_exit(mt7996_exit); +MODULE_DESCRIPTION("MediaTek MT7996 MMIO helpers"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/sdio.c b/drivers/net/wireless/mediatek/mt76/sdio.c index c52d550f0c32a..3e88798df0178 100644 --- a/drivers/net/wireless/mediatek/mt76/sdio.c +++ b/drivers/net/wireless/mediatek/mt76/sdio.c @@ -672,4 +672,5 @@ EXPORT_SYMBOL_GPL(mt76s_init); MODULE_AUTHOR("Sean Wang "); MODULE_AUTHOR("Lorenzo Bianconi "); +MODULE_DESCRIPTION("MediaTek MT76x SDIO helpers"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c index 1584665fe3cb6..5a0bcb5071bd7 100644 --- a/drivers/net/wireless/mediatek/mt76/usb.c +++ b/drivers/net/wireless/mediatek/mt76/usb.c @@ -1128,4 +1128,5 @@ int mt76u_init(struct mt76_dev *dev, struct usb_interface *intf) EXPORT_SYMBOL_GPL(mt76u_init); MODULE_AUTHOR("Lorenzo Bianconi "); +MODULE_DESCRIPTION("MediaTek MT76x USB helpers"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/wireless/mediatek/mt76/util.c b/drivers/net/wireless/mediatek/mt76/util.c index fc76c66ff1a5a..d6c01a2dd1988 100644 --- a/drivers/net/wireless/mediatek/mt76/util.c +++ b/drivers/net/wireless/mediatek/mt76/util.c @@ -138,4 +138,5 @@ int __mt76_worker_fn(void *ptr) } EXPORT_SYMBOL_GPL(__mt76_worker_fn); +MODULE_DESCRIPTION("MediaTek MT76x helpers"); MODULE_LICENSE("Dual BSD/GPL"); -- GitLab From c9ec85153fea6873c52ed4f5055c87263f1b54f9 Mon Sep 17 00:00:00 2001 From: Matthias May Date: Tue, 30 Jan 2024 10:12:18 +0000 Subject: [PATCH 0410/1405] selftests: net: add missing config for GENEVE l2_tos_ttl_inherit.sh verifies the inheritance of tos and ttl for GRETAP, VXLAN and GENEVE. Before testing it checks if the required module is available and if not skips the tests accordingly. Currently only GRETAP and VXLAN are tested because the GENEVE module is missing. Fixes: b690842d12fd ("selftests/net: test l2 tunnel TOS/TTL inheriting") Signed-off-by: Matthias May Link: https://lore.kernel.org/r/20240130101157.196006-1-matthias.may@westermo.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 3d908b52f22f3..77a173635a29b 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -19,6 +19,7 @@ CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_BRIDGE=y CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_VLAN_8021Q=y +CONFIG_GENEVE=m CONFIG_IFB=y CONFIG_INET_DIAG=y CONFIG_IP_GRE=m -- GitLab From fdd0ae72b34e56eb5e896d067c49a78ecb451032 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 31 Jan 2024 11:24:40 -0300 Subject: [PATCH 0411/1405] perf tools headers: update the asm-generic/unaligned.h copy with the kernel sources To pick up the changes in: 1ab33c03145d0f6c ("asm-generic: make sparse happy with odd-sized put_unaligned_*()") Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/include/asm-generic/unaligned.h include/asm-generic/unaligned.h Cc: Adrian Hunter Cc: Dmitry Torokhov Cc: Ian Rogers Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/Zbp9I7rmFj1Owhug@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/asm-generic/unaligned.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/include/asm-generic/unaligned.h b/tools/include/asm-generic/unaligned.h index 2fd551915c202..cdd2fd078027a 100644 --- a/tools/include/asm-generic/unaligned.h +++ b/tools/include/asm-generic/unaligned.h @@ -105,9 +105,9 @@ static inline u32 get_unaligned_le24(const void *p) static inline void __put_unaligned_be24(const u32 val, u8 *p) { - *p++ = val >> 16; - *p++ = val >> 8; - *p++ = val; + *p++ = (val >> 16) & 0xff; + *p++ = (val >> 8) & 0xff; + *p++ = val & 0xff; } static inline void put_unaligned_be24(const u32 val, void *p) @@ -117,9 +117,9 @@ static inline void put_unaligned_be24(const u32 val, void *p) static inline void __put_unaligned_le24(const u32 val, u8 *p) { - *p++ = val; - *p++ = val >> 8; - *p++ = val >> 16; + *p++ = val & 0xff; + *p++ = (val >> 8) & 0xff; + *p++ = (val >> 16) & 0xff; } static inline void put_unaligned_le24(const u32 val, void *p) @@ -129,12 +129,12 @@ static inline void put_unaligned_le24(const u32 val, void *p) static inline void __put_unaligned_be48(const u64 val, u8 *p) { - *p++ = val >> 40; - *p++ = val >> 32; - *p++ = val >> 24; - *p++ = val >> 16; - *p++ = val >> 8; - *p++ = val; + *p++ = (val >> 40) & 0xff; + *p++ = (val >> 32) & 0xff; + *p++ = (val >> 24) & 0xff; + *p++ = (val >> 16) & 0xff; + *p++ = (val >> 8) & 0xff; + *p++ = val & 0xff; } static inline void put_unaligned_be48(const u64 val, void *p) -- GitLab From 2b9c3eb32a699acdd4784d6b93743271b4970899 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Sat, 14 Oct 2023 12:20:15 +0200 Subject: [PATCH 0412/1405] Input: bcm5974 - check endpoint type before starting traffic syzbot has found a type mismatch between a USB pipe and the transfer endpoint, which is triggered by the bcm5974 driver[1]. This driver expects the device to provide input interrupt endpoints and if that is not the case, the driver registration should terminate. Repros are available to reproduce this issue with a certain setup for the dummy_hcd, leading to an interrupt/bulk mismatch which is caught in the USB core after calling usb_submit_urb() with the following message: "BOGUS urb xfer, pipe 1 != type 3" Some other device drivers (like the appletouch driver bcm5974 is mainly based on) provide some checking mechanism to make sure that an IN interrupt endpoint is available. In this particular case the endpoint addresses are provided by a config table, so the checking can be targeted to the provided endpoints. Add some basic checking to guarantee that the endpoints available match the expected type for both the trackpad and button endpoints. This issue was only found for the trackpad endpoint, but the checking has been added to the button endpoint as well for the same reasons. Given that there was never a check for the endpoint type, this bug has been there since the first implementation of the driver (f89bd95c5c94). [1] https://syzkaller.appspot.com/bug?extid=348331f63b034f89b622 Fixes: f89bd95c5c94 ("Input: bcm5974 - add driver for Macbook Air and Pro Penryn touchpads") Signed-off-by: Javier Carrasco Reported-and-tested-by: syzbot+348331f63b034f89b622@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20231007-topic-bcm5974_bulk-v3-1-d0f38b9d2935@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/bcm5974.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c index ca150618d32f1..953992b458e9f 100644 --- a/drivers/input/mouse/bcm5974.c +++ b/drivers/input/mouse/bcm5974.c @@ -19,6 +19,7 @@ * Copyright (C) 2006 Nicolas Boichat (nicolas@boichat.ch) */ +#include "linux/usb.h" #include #include #include @@ -193,6 +194,8 @@ enum tp_type { /* list of device capability bits */ #define HAS_INTEGRATED_BUTTON 1 +/* maximum number of supported endpoints (currently trackpad and button) */ +#define MAX_ENDPOINTS 2 /* trackpad finger data block size */ #define FSIZE_TYPE1 (14 * sizeof(__le16)) @@ -891,6 +894,18 @@ static int bcm5974_resume(struct usb_interface *iface) return error; } +static bool bcm5974_check_endpoints(struct usb_interface *iface, + const struct bcm5974_config *cfg) +{ + u8 ep_addr[MAX_ENDPOINTS + 1] = {0}; + + ep_addr[0] = cfg->tp_ep; + if (cfg->tp_type == TYPE1) + ep_addr[1] = cfg->bt_ep; + + return usb_check_int_endpoints(iface, ep_addr); +} + static int bcm5974_probe(struct usb_interface *iface, const struct usb_device_id *id) { @@ -903,6 +918,11 @@ static int bcm5974_probe(struct usb_interface *iface, /* find the product index */ cfg = bcm5974_get_config(udev); + if (!bcm5974_check_endpoints(iface, cfg)) { + dev_err(&iface->dev, "Unexpected non-int endpoint\n"); + return -ENODEV; + } + /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(struct bcm5974), GFP_KERNEL); input_dev = input_allocate_device(); -- GitLab From 66bbea9ed6446b8471d365a22734dc00556c4785 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Wed, 31 Jan 2024 14:09:55 +0000 Subject: [PATCH 0413/1405] ring-buffer: Clean ring_buffer_poll_wait() error return The return type for ring_buffer_poll_wait() is __poll_t. This is behind the scenes an unsigned where we can set event bits. In case of a non-allocated CPU, we do return instead -EINVAL (0xffffffea). Lucky us, this ends up setting few error bits (EPOLLERR | EPOLLHUP | EPOLLNVAL), so user-space at least is aware something went wrong. Nonetheless, this is an incorrect code. Replace that -EINVAL with a proper EPOLLERR to clean that output. As this doesn't change the behaviour, there's no need to treat this change as a bug fix. Link: https://lore.kernel.org/linux-trace-kernel/20240131140955.3322792-1-vdonnefort@google.com Cc: stable@vger.kernel.org Fixes: 6721cb6002262 ("ring-buffer: Do not poll non allocated cpu buffers") Signed-off-by: Vincent Donnefort Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 13aaf5e85b811..fd4bfe3ecf014 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -944,7 +944,7 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, full = 0; } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) - return -EINVAL; + return EPOLLERR; cpu_buffer = buffer->buffers[cpu]; work = &cpu_buffer->irq_work; -- GitLab From d81786f53aec14fd4d56263145a0635afbc64617 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 31 Jan 2024 13:49:19 -0500 Subject: [PATCH 0414/1405] tracefs: Zero out the tracefs_inode when allocating it eventfs uses the tracefs_inode and assumes that it's already initialized to zero. That is, it doesn't set fields to zero (like ti->private) after getting its tracefs_inode. This causes bugs due to stale values. Just initialize the entire structure to zero on allocation so there isn't any more surprises. This is a partial fix to access to ti->private. The assignment still needs to be made before the dentry is instantiated. Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.315825944@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Cc: Greg Kroah-Hartman Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202401291043.e62e89dc-oliver.sang@intel.com Suggested-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/inode.c | 6 ++++-- fs/tracefs/internal.h | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index e1b172c0e091a..888e420878477 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -38,8 +38,6 @@ static struct inode *tracefs_alloc_inode(struct super_block *sb) if (!ti) return NULL; - ti->flags = 0; - return &ti->vfs_inode; } @@ -779,7 +777,11 @@ static void init_once(void *foo) { struct tracefs_inode *ti = (struct tracefs_inode *) foo; + /* inode_init_once() calls memset() on the vfs_inode portion */ inode_init_once(&ti->vfs_inode); + + /* Zero out the rest */ + memset_after(ti, 0, vfs_inode); } static int __init tracefs_init(void) diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 91c2bf0b91d9c..7d84349ade872 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -11,9 +11,10 @@ enum { }; struct tracefs_inode { + struct inode vfs_inode; + /* The below gets initialized with memset_after(ti, 0, vfs_inode) */ unsigned long flags; void *private; - struct inode vfs_inode; }; /* -- GitLab From 4fa4b010b83fb2f837b5ef79e38072a79e96e4f1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 31 Jan 2024 13:49:20 -0500 Subject: [PATCH 0415/1405] eventfs: Initialize the tracefs inode properly The tracefs-specific fields in the inode were not initialized before the inode was exposed to others through the dentry with 'd_instantiate()'. Move the field initializations up to before the d_instantiate. Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.478449628@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Cc: Greg Kroah-Hartman Fixes: 5790b1fb3d672 ("eventfs: Remove eventfs_file and just use eventfs_inode") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202401291043.e62e89dc-oliver.sang@intel.com Signed-off-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 1c3dd0ad4660e..824b1811e342f 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -370,6 +370,8 @@ static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent ti = get_tracefs(inode); ti->flags |= TRACEFS_EVENT_INODE; + /* Only directories have ti->private set to an ei, not files */ + ti->private = ei; inc_nlink(inode); d_instantiate(dentry, inode); @@ -515,7 +517,6 @@ create_file_dentry(struct eventfs_inode *ei, int idx, static void eventfs_post_create_dir(struct eventfs_inode *ei) { struct eventfs_inode *ei_child; - struct tracefs_inode *ti; lockdep_assert_held(&eventfs_mutex); @@ -525,9 +526,6 @@ static void eventfs_post_create_dir(struct eventfs_inode *ei) srcu_read_lock_held(&eventfs_srcu)) { ei_child->d_parent = ei->dentry; } - - ti = get_tracefs(ei->dentry->d_inode); - ti->private = ei; } /** -- GitLab From 99c001cb617df409dac275a059d6c3f187a2da7a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 31 Jan 2024 13:49:21 -0500 Subject: [PATCH 0416/1405] tracefs: Avoid using the ei->dentry pointer unnecessarily The eventfs_find_events() code tries to walk up the tree to find the event directory that a dentry belongs to, in order to then find the eventfs inode that is associated with that event directory. However, it uses an odd combination of walking the dentry parent, looking up the eventfs inode associated with that, and then looking up the dentry from there. Repeat. But the code shouldn't have back-pointers to dentries in the first place, and it should just walk the dentry parenthood chain directly. Similarly, 'set_top_events_ownership()' looks up the dentry from the eventfs inode, but the only reason it wants a dentry is to look up the superblock in order to look up the root dentry. But it already has the real filesystem inode, which has that same superblock pointer. So just pass in the superblock pointer using the information that's already there, instead of looking up extraneous data that is irrelevant. Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/ Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.638645365@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Cc: Greg Kroah-Hartman Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions") Signed-off-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 824b1811e342f..e9819d719d2ac 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -156,33 +156,30 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, return ret; } -static void update_top_events_attr(struct eventfs_inode *ei, struct dentry *dentry) +static void update_top_events_attr(struct eventfs_inode *ei, struct super_block *sb) { - struct inode *inode; + struct inode *root; /* Only update if the "events" was on the top level */ if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL)) return; /* Get the tracefs root inode. */ - inode = d_inode(dentry->d_sb->s_root); - ei->attr.uid = inode->i_uid; - ei->attr.gid = inode->i_gid; + root = d_inode(sb->s_root); + ei->attr.uid = root->i_uid; + ei->attr.gid = root->i_gid; } static void set_top_events_ownership(struct inode *inode) { struct tracefs_inode *ti = get_tracefs(inode); struct eventfs_inode *ei = ti->private; - struct dentry *dentry; /* The top events directory doesn't get automatically updated */ if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL)) return; - dentry = ei->dentry; - - update_top_events_attr(ei, dentry); + update_top_events_attr(ei, inode->i_sb); if (!(ei->attr.mode & EVENTFS_SAVE_UID)) inode->i_uid = ei->attr.uid; @@ -235,8 +232,10 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) mutex_lock(&eventfs_mutex); do { - /* The parent always has an ei, except for events itself */ - ei = dentry->d_parent->d_fsdata; + // The parent is stable because we do not do renames + dentry = dentry->d_parent; + // ... and directories always have d_fsdata + ei = dentry->d_fsdata; /* * If the ei is being freed, the ownership of the children @@ -246,12 +245,11 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) ei = NULL; break; } - - dentry = ei->dentry; + // Walk upwards until you find the events inode } while (!ei->is_events); mutex_unlock(&eventfs_mutex); - update_top_events_attr(ei, dentry); + update_top_events_attr(ei, dentry->d_sb); return ei; } -- GitLab From db2aad036e77100e04a96c67f65ae7d49fb538fb Mon Sep 17 00:00:00 2001 From: Le Ma Date: Thu, 25 Jan 2024 12:00:34 +0800 Subject: [PATCH 0417/1405] drm/amdgpu: move the drm client creation behind drm device registration This patch is to eliminate interrupt warning below: "[drm] Fence fallback timer expired on ring sdma0.0". An early vm pt clearing job is sent to SDMA ahead of interrupt enabled. And re-locating the drm client creation following after drm_dev_register looks like a more proper flow. v2: wrap the drm client creation Fixes: 1819200166ce ("drm/amdkfd: Export DMABufs from KFD using GEM handles") Signed-off-by: Le Ma Reviewed-by: Felix Kuehling Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++++++++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 +++ 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 77e2636602887..41db030ddc4ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -141,11 +141,31 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) static const struct drm_client_funcs kfd_client_funcs = { .unregister = drm_client_release, }; + +int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev) +{ + int ret; + + if (!adev->kfd.init_complete) + return 0; + + ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", + &kfd_client_funcs); + if (ret) { + dev_err(adev->dev, "Failed to init DRM client: %d\n", + ret); + return ret; + } + + drm_client_register(&adev->kfd.client); + + return 0; +} + void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; int last_valid_bit; - int ret; amdgpu_amdkfd_gpuvm_init_mem_limits(); @@ -164,12 +184,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) .enable_mes = adev->enable_mes, }; - ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs); - if (ret) { - dev_err(adev->dev, "Failed to init DRM client: %d\n", ret); - return; - } - /* this is going to have a few of the MSBs set that we need to * clear */ @@ -208,10 +222,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); - if (adev->kfd.init_complete) - drm_client_register(&adev->kfd.client); - else - drm_client_release(&adev->kfd.client); amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index f262b9d89541a..937d0f0b21dfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -182,6 +182,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, struct mm_struct *mm, struct svm_range_bo *svm_bo); + +int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev); #if defined(CONFIG_DEBUG_FS) int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index cc69005f5b46e..971acf01bea60 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2255,6 +2255,10 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, if (ret) goto err_pci; + ret = amdgpu_amdkfd_drm_client_create(adev); + if (ret) + goto err_pci; + /* * 1. don't init fbdev on hw without DCE * 2. don't init fbdev if there are no connectors -- GitLab From d9807d60c145836043ffa602328ea1d66dc458b1 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Wed, 17 Jan 2024 22:03:33 +0800 Subject: [PATCH 0418/1405] riscv: mm: execute local TLB flush after populating vmemmap The spare_init() calls memmap_populate() many times to create VA to PA mapping for the VMEMMAP area, where all "struct page" are located once CONFIG_SPARSEMEM_VMEMMAP is defined. These "struct page" are later initialized in the zone_sizes_init() function. However, during this process, no sfence.vma instruction is executed for this VMEMMAP area. This omission may cause the hart to fail to perform page table walk because some data related to the address translation is invisible to the hart. To solve this issue, the local_flush_tlb_kernel_range() is called right after the sparse_init() to execute a sfence.vma instruction for this VMEMMAP area, ensuring that all data related to the address translation is visible to the hart. Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem") Signed-off-by: Vincent Chen Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240117140333.2479667-1-vincent.chen@sifive.com Fixes: 7a92fc8b4d20 ("mm: Introduce flush_cache_vmap_early()") Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/tlbflush.h | 1 + arch/riscv/mm/init.c | 4 ++++ arch/riscv/mm/tlbflush.c | 3 ++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 928f096dca21b..4112cc8d1d69f 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -75,6 +75,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, #define flush_tlb_mm(mm) flush_tlb_all() #define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() +#define local_flush_tlb_kernel_range(start, end) flush_tlb_all() #endif /* !CONFIG_SMP || !CONFIG_MMU */ #endif /* _ASM_RISCV_TLBFLUSH_H */ diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 32cad6a65ccd2..fa34cf55037bd 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1385,6 +1385,10 @@ void __init misc_mem_init(void) early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); arch_numa_init(); sparse_init(); +#ifdef CONFIG_SPARSEMEM_VMEMMAP + /* The entire VMEMMAP region has been populated. Flush TLB for this region */ + local_flush_tlb_kernel_range(VMEMMAP_START, VMEMMAP_END); +#endif zone_sizes_init(); arch_reserve_crashkernel(); memblock_dump_all(); diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 8d12b26f5ac37..dffb0e5bd9425 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -66,9 +66,10 @@ static inline void local_flush_tlb_range_asid(unsigned long start, local_flush_tlb_range_threshold_asid(start, size, stride, asid); } +/* Flush a range of kernel pages without broadcasting */ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) { - local_flush_tlb_range_asid(start, end, PAGE_SIZE, FLUSH_TLB_NO_ASID); + local_flush_tlb_range_asid(start, end - start, PAGE_SIZE, FLUSH_TLB_NO_ASID); } static void __ipi_flush_tlb_all(void *info) -- GitLab From 9e05d9b06757732477cca428e87f3d72163d01cf Mon Sep 17 00:00:00 2001 From: Tengfei Yu Date: Thu, 25 Jan 2024 13:08:23 +0800 Subject: [PATCH 0419/1405] KVM: x86: Check irqchip mode before create PIT As the kvm api(https://docs.kernel.org/virt/kvm/api.html) reads, KVM_CREATE_PIT2 call is only valid after enabling in-kernel irqchip support via KVM_CREATE_IRQCHIP. Without this check, I can create PIT first and enable irqchip-split then, which may cause the PIT invalid because of lacking of in-kernel PIC to inject the interrupt. Signed-off-by: Tengfei Yu Message-Id: <20240125050823.4893-1-moehanabichan@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 363b1c0802057..9a89471a613cd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7016,6 +7016,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) r = -EEXIST; if (kvm->arch.vpit) goto create_pit_unlock; + r = -ENOENT; + if (!pic_in_kernel(kvm)) + goto create_pit_unlock; r = -ENOMEM; kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags); if (kvm->arch.vpit) -- GitLab From d52734d00b8e86604a66b4cdfa9e8bb541daca2d Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Wed, 24 Jan 2024 21:18:21 +0100 Subject: [PATCH 0420/1405] KVM: x86: Give a hint when Win2016 might fail to boot due to XSAVES erratum Since commit b0563468eeac ("x86/CPU/AMD: Disable XSAVES on AMD family 0x17") kernel unconditionally clears the XSAVES CPU feature bit on Zen1/2 CPUs. Because KVM CPU caps are initialized from the kernel boot CPU features this makes the XSAVES feature also unavailable for KVM guests in this case. At the same time the XSAVEC feature is left enabled. Unfortunately, having XSAVEC but no XSAVES in CPUID breaks Hyper-V enabled Windows Server 2016 VMs that have more than one vCPU. Let's at least give users hint in the kernel log what could be wrong since these VMs currently simply hang at boot with a black screen - giving no clue what suddenly broke them and how to make them work again. Trigger the kernel message hint based on the particular guest ID written to the Guest OS Identity Hyper-V MSR implemented by KVM. Defer this check to when the L1 Hyper-V hypervisor enables SVM in EFER since we want to limit this message to Hyper-V enabled Windows guests only (Windows session running nested as L2) but the actual Guest OS Identity MSR write is done by L1 and happens before it enables SVM. Fixes: b0563468eeac ("x86/CPU/AMD: Disable XSAVES on AMD family 0x17") Signed-off-by: Maciej S. Szmigiero Message-Id: [Move some checks before mutex_lock(), rename function. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/hyperv.c | 50 +++++++++++++++++++++++++++++++++ arch/x86/kvm/hyperv.h | 3 ++ arch/x86/kvm/x86.c | 4 +++ 4 files changed, 59 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b5b2d0fde5796..d271ba20a0b21 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1145,6 +1145,8 @@ struct kvm_hv { unsigned int synic_auto_eoi_used; struct kvm_hv_syndbg hv_syndbg; + + bool xsaves_xsavec_checked; }; #endif diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 4943f6b2bbee4..8a47f8541eab7 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1322,6 +1322,56 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr) return false; } +#define KVM_HV_WIN2016_GUEST_ID 0x1040a00003839 +#define KVM_HV_WIN2016_GUEST_ID_MASK (~GENMASK_ULL(23, 16)) /* mask out the service version */ + +/* + * Hyper-V enabled Windows Server 2016 SMP VMs fail to boot in !XSAVES && XSAVEC + * configuration. + * Such configuration can result from, for example, AMD Erratum 1386 workaround. + * + * Print a notice so users aren't left wondering what's suddenly gone wrong. + */ +static void __kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_hv *hv = to_kvm_hv(kvm); + + /* Check again under the hv_lock. */ + if (hv->xsaves_xsavec_checked) + return; + + if ((hv->hv_guest_os_id & KVM_HV_WIN2016_GUEST_ID_MASK) != + KVM_HV_WIN2016_GUEST_ID) + return; + + hv->xsaves_xsavec_checked = true; + + /* UP configurations aren't affected */ + if (atomic_read(&kvm->online_vcpus) < 2) + return; + + if (guest_cpuid_has(vcpu, X86_FEATURE_XSAVES) || + !guest_cpuid_has(vcpu, X86_FEATURE_XSAVEC)) + return; + + pr_notice_ratelimited("Booting SMP Windows KVM VM with !XSAVES && XSAVEC. " + "If it fails to boot try disabling XSAVEC in the VM config.\n"); +} + +void kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu) +{ + struct kvm_hv *hv = to_kvm_hv(vcpu->kvm); + + if (!vcpu->arch.hyperv_enabled || + hv->xsaves_xsavec_checked) + return; + + mutex_lock(&hv->hv_lock); + __kvm_hv_xsaves_xsavec_maybe_warn(vcpu); + mutex_unlock(&hv->hv_lock); +} + static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) { diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 1dc0b6604526a..923e64903da9a 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -182,6 +182,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, struct pvclock_vcpu_time_info *hv_clock); void kvm_hv_request_tsc_page_update(struct kvm *kvm); +void kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu); + void kvm_hv_init_vm(struct kvm *kvm); void kvm_hv_destroy_vm(struct kvm *kvm); int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu); @@ -267,6 +269,7 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu); static inline void kvm_hv_setup_tsc_page(struct kvm *kvm, struct pvclock_vcpu_time_info *hv_clock) {} static inline void kvm_hv_request_tsc_page_update(struct kvm *kvm) {} +static inline void kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu) {} static inline void kvm_hv_init_vm(struct kvm *kvm) {} static inline void kvm_hv_destroy_vm(struct kvm *kvm) {} static inline int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9a89471a613cd..bf10a9073a092 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1782,6 +1782,10 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if ((efer ^ old_efer) & KVM_MMU_EFER_ROLE_BITS) kvm_mmu_reset_context(vcpu); + if (!static_cpu_has(X86_FEATURE_XSAVES) && + (efer & EFER_SVME)) + kvm_hv_xsaves_xsavec_maybe_warn(vcpu); + return 0; } -- GitLab From a9ef277488cfc1b7da88235dc11c338a14f34835 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 24 Jan 2024 15:03:17 +0200 Subject: [PATCH 0421/1405] x86/kvm: Fix SEV check in sev_map_percpu_data() The function sev_map_percpu_data() checks if it is running on an SEV platform by checking the CC_ATTR_GUEST_MEM_ENCRYPT attribute. However, this attribute is also defined for TDX. To avoid false positives, add a cc_vendor check. Signed-off-by: Kirill A. Shutemov Fixes: 4d96f9109109 ("x86/sev: Replace occurrences of sev_active() with cc_platform_has()") Suggested-by: Borislav Petkov (AMD) Acked-by: David Rientjes Message-Id: <20240124130317.495519-1-kirill.shutemov@linux.intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index dfe9945b9bece..428ee74002e1e 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -434,7 +434,8 @@ static void __init sev_map_percpu_data(void) { int cpu; - if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + if (cc_vendor != CC_VENDOR_AMD || + !cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) return; for_each_possible_cpu(cpu) { -- GitLab From 607aad1e4356c210dbef9022955a3089377909b2 Mon Sep 17 00:00:00 2001 From: "Christian A. Ehrhardt" Date: Mon, 29 Jan 2024 20:25:56 +0100 Subject: [PATCH 0422/1405] of: unittest: Fix compile in the non-dynamic case If CONFIG_OF_KOBJ is not set, a device_node does not contain a kobj and attempts to access the embedded kobj via kref_read break the compile. Replace affected kref_read calls with a macro that reads the refcount if it exists and returns 1 if there is no embedded kobj. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401291740.VP219WIz-lkp@intel.com/ Fixes: 4dde83569832 ("of: Fix double free in of_parse_phandle_with_args_map") Signed-off-by: Christian A. Ehrhardt Link: https://lore.kernel.org/r/20240129192556.403271-1-lk@c--e.de Signed-off-by: Rob Herring --- drivers/of/unittest.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index cfd60e35a8992..d7593bde2d02f 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -50,6 +50,12 @@ static struct unittest_results { failed; \ }) +#ifdef CONFIG_OF_KOBJ +#define OF_KREF_READ(NODE) kref_read(&(NODE)->kobj.kref) +#else +#define OF_KREF_READ(NODE) 1 +#endif + /* * Expected message may have a message level other than KERN_INFO. * Print the expected message only if the current loglevel will allow @@ -570,7 +576,7 @@ static void __init of_unittest_parse_phandle_with_args_map(void) pr_err("missing testcase data\n"); return; } - prefs[i] = kref_read(&p[i]->kobj.kref); + prefs[i] = OF_KREF_READ(p[i]); } rc = of_count_phandle_with_args(np, "phandle-list", "#phandle-cells"); @@ -693,9 +699,9 @@ static void __init of_unittest_parse_phandle_with_args_map(void) unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc); for (i = 0; i < ARRAY_SIZE(p); ++i) { - unittest(prefs[i] == kref_read(&p[i]->kobj.kref), + unittest(prefs[i] == OF_KREF_READ(p[i]), "provider%d: expected:%d got:%d\n", - i, prefs[i], kref_read(&p[i]->kobj.kref)); + i, prefs[i], OF_KREF_READ(p[i])); of_node_put(p[i]); } } -- GitLab From fb366fc7541a1de521ab3df58471746aa793b833 Mon Sep 17 00:00:00 2001 From: Ryan Schaefer Date: Sun, 21 Jan 2024 21:51:44 +0000 Subject: [PATCH 0423/1405] netfilter: conntrack: correct window scaling with retransmitted SYN commit c7aab4f17021 ("netfilter: nf_conntrack_tcp: re-init for syn packets only") introduces a bug where SYNs in ORIGINAL direction on reused 5-tuple result in incorrect window scale negotiation. This commit merged the SYN re-initialization and simultaneous open or SYN retransmits cases. Merging this block added the logic in tcp_init_sender() that performed window scale negotiation to the retransmitted syn case. Previously. this would only result in updating the sender's scale and flags. After the merge the additional logic results in improperly clearing the scale in ORIGINAL direction before any packets in the REPLY direction are received. This results in packets incorrectly being marked invalid for being out-of-window. This can be reproduced with the following trace: Packet Sequence: > Flags [S], seq 1687765604, win 62727, options [.. wscale 7], length 0 > Flags [S], seq 1944817196, win 62727, options [.. wscale 7], length 0 In order to fix the issue, only evaluate window negotiation for packets in the REPLY direction. This was tested with simultaneous open, fast open, and the above reproduction. Fixes: c7aab4f17021 ("netfilter: nf_conntrack_tcp: re-init for syn packets only") Signed-off-by: Ryan Schaefer Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index e573be5afde7a..ae493599a3ef0 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -457,7 +457,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, - u32 end, u32 win) + u32 end, u32 win, + enum ip_conntrack_dir dir) { /* SYN-ACK in reply to a SYN * or SYN from reply direction in simultaneous open. @@ -471,7 +472,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender, * Both sides must send the Window Scale option * to enable window scaling in either direction. */ - if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && + if (dir == IP_CT_DIR_REPLY && + !(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) { sender->td_scale = 0; receiver->td_scale = 0; @@ -542,7 +544,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, if (tcph->syn) { tcp_init_sender(sender, receiver, skb, dataoff, tcph, - end, win); + end, win, dir); if (!tcph->ack) /* Simultaneous open */ return NFCT_TCP_ACCEPT; @@ -585,7 +587,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, */ tcp_init_sender(sender, receiver, skb, dataoff, tcph, - end, win); + end, win, dir); if (dir == IP_CT_DIR_REPLY && !tcph->ack) return NFCT_TCP_ACCEPT; -- GitLab From 776d451648443f9884be4a1b4e38e8faf1c621f9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 23 Jan 2024 23:45:32 +0100 Subject: [PATCH 0424/1405] netfilter: nf_tables: restrict tunnel object to NFPROTO_NETDEV Bail out on using the tunnel dst template from other than netdev family. Add the infrastructure to check for the family in objects. Fixes: af308b94a2a4 ("netfilter: nf_tables: add tunnel support") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 14 +++++++++----- net/netfilter/nft_tunnel.c | 1 + 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 4e1ea18eb5f05..001226c346215 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1351,6 +1351,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, * @type: stateful object numeric type * @owner: module owner * @maxattr: maximum netlink attribute + * @family: address family for AF-specific object types * @policy: netlink attribute policy */ struct nft_object_type { @@ -1360,6 +1361,7 @@ struct nft_object_type { struct list_head list; u32 type; unsigned int maxattr; + u8 family; struct module *owner; const struct nla_policy *policy; }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c537104411e7d..fc016befb46ff 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7551,11 +7551,15 @@ static int nft_object_dump(struct sk_buff *skb, unsigned int attr, return -1; } -static const struct nft_object_type *__nft_obj_type_get(u32 objtype) +static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family) { const struct nft_object_type *type; list_for_each_entry(type, &nf_tables_objects, list) { + if (type->family != NFPROTO_UNSPEC && + type->family != family) + continue; + if (objtype == type->type) return type; } @@ -7563,11 +7567,11 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype) } static const struct nft_object_type * -nft_obj_type_get(struct net *net, u32 objtype) +nft_obj_type_get(struct net *net, u32 objtype, u8 family) { const struct nft_object_type *type; - type = __nft_obj_type_get(objtype); + type = __nft_obj_type_get(objtype, family); if (type != NULL && try_module_get(type->owner)) return type; @@ -7660,7 +7664,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, if (info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - type = __nft_obj_type_get(objtype); + type = __nft_obj_type_get(objtype, family); if (WARN_ON_ONCE(!type)) return -ENOENT; @@ -7674,7 +7678,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, if (!nft_use_inc(&table->use)) return -EMFILE; - type = nft_obj_type_get(net, objtype); + type = nft_obj_type_get(net, objtype, family); if (IS_ERR(type)) { err = PTR_ERR(type); goto err_type; diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 9f21953c7433f..f735d79d8be57 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -713,6 +713,7 @@ static const struct nft_object_ops nft_tunnel_obj_ops = { static struct nft_object_type nft_tunnel_obj_type __read_mostly = { .type = NFT_OBJECT_TUNNEL, + .family = NFPROTO_NETDEV, .ops = &nft_tunnel_obj_ops, .maxattr = NFTA_TUNNEL_KEY_MAX, .policy = nft_tunnel_key_policy, -- GitLab From 97830f3c3088638ff90b20dfba2eb4d487bf14d7 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Wed, 31 Jan 2024 21:53:46 +0000 Subject: [PATCH 0425/1405] binder: signal epoll threads of self-work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In (e)poll mode, threads often depend on I/O events to determine when data is ready for consumption. Within binder, a thread may initiate a command via BINDER_WRITE_READ without a read buffer and then make use of epoll_wait() or similar to consume any responses afterwards. It is then crucial that epoll threads are signaled via wakeup when they queue their own work. Otherwise, they risk waiting indefinitely for an event leaving their work unhandled. What is worse, subsequent commands won't trigger a wakeup either as the thread has pending work. Fixes: 457b9a6f09f0 ("Staging: android: add binder driver") Cc: Arve Hjønnevåg Cc: Martijn Coenen Cc: Alice Ryhl Cc: Steven Moreland Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20240131215347.1808751-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 8dd23b19e9973..eca24f41556df 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -478,6 +478,16 @@ binder_enqueue_thread_work_ilocked(struct binder_thread *thread, { WARN_ON(!list_empty(&thread->waiting_thread_node)); binder_enqueue_work_ilocked(work, &thread->todo); + + /* (e)poll-based threads require an explicit wakeup signal when + * queuing their own work; they rely on these events to consume + * messages without I/O block. Without it, threads risk waiting + * indefinitely without handling the work. + */ + if (thread->looper & BINDER_LOOPER_STATE_POLL && + thread->pid == current->pid && !thread->process_todo) + wake_up_interruptible_sync(&thread->wait); + thread->process_todo = true; } -- GitLab From 6e348067ee4bc5905e35faa3a8fafa91c9124bc7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 25 Jan 2024 17:29:46 -0500 Subject: [PATCH 0426/1405] netfilter: conntrack: check SCTP_CID_SHUTDOWN_ACK for vtag setting in sctp_new The annotation says in sctp_new(): "If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8)". However, it does not check SCTP_CID_SHUTDOWN_ACK before setting vtag[REPLY] in the conntrack entry(ct). Because of that, if the ct in Router disappears for some reason in [1] with the packet sequence like below: Client > Server: sctp (1) [INIT] [init tag: 3201533963] Server > Client: sctp (1) [INIT ACK] [init tag: 972498433] Client > Server: sctp (1) [COOKIE ECHO] Server > Client: sctp (1) [COOKIE ACK] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057809] Server > Client: sctp (1) [SACK] [cum ack 3075057809] Server > Client: sctp (1) [HB REQ] (the ct in Router disappears somehow) <-------- [1] Client > Server: sctp (1) [HB ACK] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057810] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057810] Client > Server: sctp (1) [HB REQ] Client > Server: sctp (1) [DATA] (B)(E) [TSN: 3075057810] Client > Server: sctp (1) [HB REQ] Client > Server: sctp (1) [ABORT] when processing HB ACK packet in Router it calls sctp_new() to initialize the new ct with vtag[REPLY] set to HB_ACK packet's vtag. Later when sending DATA from Client, all the SACKs from Server will get dropped in Router, as the SACK packet's vtag does not match vtag[REPLY] in the ct. The worst thing is the vtag in this ct will never get fixed by the upcoming packets from Server. This patch fixes it by checking SCTP_CID_SHUTDOWN_ACK before setting vtag[REPLY] in the ct in sctp_new() as the annotation says. With this fix, it will leave vtag[REPLY] in ct to 0 in the case above, and the next HB REQ/ACK from Server is able to fix the vtag as its value is 0 in nf_conntrack_sctp_packet(). Signed-off-by: Xin Long Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_sctp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c6bd533983c1f..4cc97f971264e 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -283,7 +283,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb, pr_debug("Setting vtag %x for secondary conntrack\n", sh->vtag); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag; - } else { + } else if (sch->type == SCTP_CID_SHUTDOWN_ACK) { /* If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ pr_debug("Setting vtag %x for new conn OOTB\n", -- GitLab From 97f7cf1cd80eeed3b7c808b7c12463295c751001 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 29 Jan 2024 10:57:01 +0100 Subject: [PATCH 0427/1405] netfilter: ipset: fix performance regression in swap operation The patch "netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test", commit 28628fa9 fixes a race condition. But the synchronize_rcu() added to the swap function unnecessarily slows it down: it can safely be moved to destroy and use call_rcu() instead. Eric Dumazet pointed out that simply calling the destroy functions as rcu callback does not work: sets with timeout use garbage collectors which need cancelling at destroy which can wait. Therefore the destroy functions are split into two: cancelling garbage collectors safely at executing the command received by netlink and moving the remaining part only into the rcu callback. Link: https://lore.kernel.org/lkml/C0829B10-EAA6-4809-874E-E1E9C05A8D84@automattic.com/ Fixes: 28628fa952fe ("netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test") Reported-by: Ale Crismani Reported-by: David Wang <00107082@163.com> Tested-by: David Wang <00107082@163.com> Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 4 +++ net/netfilter/ipset/ip_set_bitmap_gen.h | 14 ++++++++-- net/netfilter/ipset/ip_set_core.c | 37 +++++++++++++++++++------ net/netfilter/ipset/ip_set_hash_gen.h | 15 ++++++++-- net/netfilter/ipset/ip_set_list_set.c | 13 +++++++-- 5 files changed, 65 insertions(+), 18 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index e8c350a3ade15..e9f4f845d760a 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -186,6 +186,8 @@ struct ip_set_type_variant { /* Return true if "b" set is the same as "a" * according to the create set parameters */ bool (*same_set)(const struct ip_set *a, const struct ip_set *b); + /* Cancel ongoing garbage collectors before destroying the set*/ + void (*cancel_gc)(struct ip_set *set); /* Region-locking is used */ bool region_lock; }; @@ -242,6 +244,8 @@ extern void ip_set_type_unregister(struct ip_set_type *set_type); /* A generic IP set */ struct ip_set { + /* For call_cru in destroy */ + struct rcu_head rcu; /* The name of the set */ char name[IPSET_MAXNAMELEN]; /* Lock protecting the set data */ diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 21f7860e8fa1f..cb48a2b9cb9fd 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -30,6 +30,7 @@ #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_list IPSET_TOKEN(MTYPE, _list) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype MTYPE #define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id))) @@ -59,9 +60,6 @@ mtype_destroy(struct ip_set *set) { struct mtype *map = set->data; - if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&map->gc); - if (set->dsize && set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); ip_set_free(map->members); @@ -290,6 +288,15 @@ mtype_gc(struct timer_list *t) add_timer(&map->gc); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct mtype *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + del_timer_sync(&map->gc); +} + static const struct ip_set_type_variant mtype = { .kadt = mtype_kadt, .uadt = mtype_uadt, @@ -303,6 +310,7 @@ static const struct ip_set_type_variant mtype = { .head = mtype_head, .list = mtype_list, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, }; #endif /* __IP_SET_BITMAP_IP_GEN_H */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 4c133e06be1de..bcaad9c009fe0 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1182,6 +1182,14 @@ ip_set_destroy_set(struct ip_set *set) kfree(set); } +static void +ip_set_destroy_set_rcu(struct rcu_head *head) +{ + struct ip_set *set = container_of(head, struct ip_set, rcu); + + ip_set_destroy_set(set); +} + static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { @@ -1193,8 +1201,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* Must wait for flush to be really finished in list:set */ - rcu_barrier(); /* Commands are serialized and references are * protected by the ip_set_ref_lock. @@ -1206,8 +1212,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, * counter, so if it's already zero, we can proceed * without holding the lock. */ - read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { + /* Must wait for flush to be really finished in list:set */ + rcu_barrier(); + read_lock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); if (s && (s->ref || s->ref_netlink)) { @@ -1221,6 +1229,8 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, s = ip_set(inst, i); if (s) { ip_set(inst, i) = NULL; + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); ip_set_destroy_set(s); } } @@ -1228,6 +1238,9 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, inst->is_destroyed = false; } else { u32 flags = flag_exist(info->nlh); + u16 features = 0; + + read_lock_bh(&ip_set_ref_lock); s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &i); if (!s) { @@ -1238,10 +1251,16 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, ret = -IPSET_ERR_BUSY; goto out; } + features = s->type->features; ip_set(inst, i) = NULL; read_unlock_bh(&ip_set_ref_lock); - - ip_set_destroy_set(s); + if (features & IPSET_TYPE_NAME) { + /* Must wait for flush to be really finished */ + rcu_barrier(); + } + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); + call_rcu(&s->rcu, ip_set_destroy_set_rcu); } return 0; out: @@ -1394,9 +1413,6 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info, ip_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); - /* Make sure all readers of the old set pointers are completed. */ - synchronize_rcu(); - return 0; } @@ -2409,8 +2425,11 @@ ip_set_fini(void) { nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - unregister_pernet_subsys(&ip_set_net_ops); + + /* Wait for call_rcu() in destroy */ + rcu_barrier(); + pr_debug("these are the famous last words\n"); } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index cbf80da9a01ca..1136510521a80 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -222,6 +222,7 @@ static const union nf_inet_addr zeromask = {}; #undef mtype_gc_do #undef mtype_gc #undef mtype_gc_init +#undef mtype_cancel_gc #undef mtype_variant #undef mtype_data_match @@ -266,6 +267,7 @@ static const union nf_inet_addr zeromask = {}; #define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype_variant IPSET_TOKEN(MTYPE, _variant) #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) @@ -450,9 +452,6 @@ mtype_destroy(struct ip_set *set) struct htype *h = set->data; struct list_head *l, *lt; - if (SET_WITH_TIMEOUT(set)) - cancel_delayed_work_sync(&h->gc.dwork); - mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); list_for_each_safe(l, lt, &h->ad) { list_del(l); @@ -599,6 +598,15 @@ mtype_gc_init(struct htable_gc *gc) queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct htype *h = set->data; + + if (SET_WITH_TIMEOUT(set)) + cancel_delayed_work_sync(&h->gc.dwork); +} + static int mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags); @@ -1441,6 +1449,7 @@ static const struct ip_set_type_variant mtype_variant = { .uref = mtype_uref, .resize = mtype_resize, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, .region_lock = true, }; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index e162636525cfb..6c3f28bc59b32 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set) struct list_set *map = set->data; struct set_elem *e, *n; - if (SET_WITH_TIMEOUT(set)) - timer_shutdown_sync(&map->gc); - list_for_each_entry_safe(e, n, &map->members, list) { list_del(&e->list); ip_set_put_byindex(map->net, e->id); @@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b) a->extensions == b->extensions; } +static void +list_set_cancel_gc(struct ip_set *set) +{ + struct list_set *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + timer_shutdown_sync(&map->gc); +} + static const struct ip_set_type_variant set_variant = { .kadt = list_set_kadt, .uadt = list_set_uadt, @@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = { .head = list_set_head, .list = list_set_list, .same_set = list_set_same_set, + .cancel_gc = list_set_cancel_gc, }; static void -- GitLab From 259eb32971e9eb24d1777a28d82730659f50fdcb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 29 Jan 2024 11:09:43 +0100 Subject: [PATCH 0428/1405] netfilter: nf_log: replace BUG_ON by WARN_ON_ONCE when putting logger Module reference is bumped for each user, this should not ever happen. But BUG_ON check should use rcu_access_pointer() instead. If this ever happens, do WARN_ON_ONCE() instead of BUG_ON() and consolidate pointer check under the rcu read side lock section. Fixes: fab4085f4e24 ("netfilter: log: nf_log_packet() as real unified interface") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_log.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 8cc52d2bd31be..e16f158388bbe 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -193,11 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type) return; } - BUG_ON(loggers[pf][type] == NULL); - rcu_read_lock(); logger = rcu_dereference(loggers[pf][type]); - module_put(logger->me); + if (!logger) + WARN_ON_ONCE(1); + else + module_put(logger->me); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_logger_put); -- GitLab From 8059918a1377f2f1fff06af4f5a4ed3d5acd6bc4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 29 Jan 2024 13:12:33 +0100 Subject: [PATCH 0429/1405] netfilter: nft_ct: sanitize layer 3 and 4 protocol number in custom expectations - Disallow families other than NFPROTO_{IPV4,IPV6,INET}. - Disallow layer 4 protocol with no ports, since destination port is a mandatory attribute for this object. Fixes: 857b46027d6f ("netfilter: nft_ct: add ct expectations support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 86bb9d7797d9e..aac98a3c966e9 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1250,7 +1250,31 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx, if (tb[NFTA_CT_EXPECT_L3PROTO]) priv->l3num = ntohs(nla_get_be16(tb[NFTA_CT_EXPECT_L3PROTO])); + switch (priv->l3num) { + case NFPROTO_IPV4: + case NFPROTO_IPV6: + if (priv->l3num != ctx->family) + return -EINVAL; + + fallthrough; + case NFPROTO_INET: + break; + default: + return -EOPNOTSUPP; + } + priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]); + switch (priv->l4proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_DCCP: + case IPPROTO_SCTP: + break; + default: + return -EOPNOTSUPP; + } + priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]); priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]); priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]); -- GitLab From 961df3085416ffabea192989941c89ffbf2af2d5 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 15 Jan 2024 13:37:47 -0500 Subject: [PATCH 0430/1405] drm/amdkfd: Correct partial migration virtual addr Partial migration to system memory should use migrate.addr, not prange->start as virtual address to allocate system memory page. Fixes: a546a2768440 ("drm/amdkfd: Use partial migrations/mapping for GPU/CPU page faults in SVM") Signed-off-by: Philip Yang Reviewed-by: Xiaogang Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index f856901055d34..bdc01ca9609a7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -574,7 +574,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, prange->last); - addr = prange->start << PAGE_SHIFT; + addr = migrate->start; src = (uint64_t *)(scratch + npages); dst = scratch; -- GitLab From c49bf4fcfc2f5516f76a706b06fcad5886cc25e1 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Fri, 5 Jan 2024 08:49:06 -0600 Subject: [PATCH 0431/1405] drm/amdkfd: Use S_ENDPGM_SAVED in trap handler This instruction has no functional difference to S_ENDPGM but allows performance counters to track save events correctly. Signed-off-by: Jay Cornwall Reviewed-by: Laurent Morichetti Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 14 +++++++------- .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm | 2 +- .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index df75863393fcb..d1caaf0e6a7c4 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -674,7 +674,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0x86ea6a6a, 0x8f6e837a, 0xb96ee0c2, 0xbf800002, 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf810000, + 0xbe801f6c, 0xbf9b0000, }; static const uint32_t cwsr_trap_nv1x_hex[] = { @@ -1091,7 +1091,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xb9eef807, 0x876dff6d, 0x0000ffff, 0x87fe7e7e, 0x87ea6a6a, 0xb9faf802, - 0xbe80226c, 0xbf810000, + 0xbe80226c, 0xbf9b0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0x00000000, @@ -1574,7 +1574,7 @@ static const uint32_t cwsr_trap_arcturus_hex[] = { 0x86ea6a6a, 0x8f6e837a, 0xb96ee0c2, 0xbf800002, 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf810000, + 0xbe801f6c, 0xbf9b0000, }; static const uint32_t cwsr_trap_aldebaran_hex[] = { @@ -2065,7 +2065,7 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = { 0x86ea6a6a, 0x8f6e837a, 0xb96ee0c2, 0xbf800002, 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf810000, + 0xbe801f6c, 0xbf9b0000, }; static const uint32_t cwsr_trap_gfx10_hex[] = { @@ -2500,7 +2500,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x876dff6d, 0x0000ffff, 0x87fe7e7e, 0x87ea6a6a, 0xb9faf802, 0xbe80226c, - 0xbf810000, 0xbf9f0000, + 0xbf9b0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, }; @@ -2944,7 +2944,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = { 0xb8eef802, 0xbf0d866e, 0xbfa20002, 0xb97af802, 0xbe80486c, 0xb97af802, - 0xbe804a6c, 0xbfb00000, + 0xbe804a6c, 0xbfb10000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0x00000000, @@ -3436,5 +3436,5 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { 0x86ea6a6a, 0x8f6e837a, 0xb96ee0c2, 0xbf800002, 0xb97a0002, 0xbf8a0000, - 0xbe801f6c, 0xbf810000, + 0xbe801f6c, 0xbf9b0000, }; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index e0140df0b0ec8..71b3dc0c73634 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -1104,7 +1104,7 @@ L_RETURN_WITHOUT_PRIV: s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution L_END_PGM: - s_endpgm + s_endpgm_saved end function write_hwreg_to_mem(s, s_rsrc, s_mem_offset) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index e506411ad28ab..bb26338204f4b 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -921,7 +921,7 @@ L_RESTORE: /* the END */ /**************************************************************************/ L_END_PGM: - s_endpgm + s_endpgm_saved end -- GitLab From 4119734e06a7f30e7e8eb666692a58b85dca0269 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 26 Jan 2024 15:14:50 -0500 Subject: [PATCH 0432/1405] drm/amdkfd: Use correct drm device for cgroup permission check On GFX 9.4.3, for a given KFD node, fetch the correct drm device from XCP manager when checking for cgroup permissions. Signed-off-by: Mukul Joshi Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 17fbedbf36513..677281c0793e2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1488,10 +1488,15 @@ void kfd_dec_compute_active(struct kfd_node *dev); /* Cgroup Support */ /* Check with device cgroup if @kfd device is accessible */ -static inline int kfd_devcgroup_check_permission(struct kfd_node *kfd) +static inline int kfd_devcgroup_check_permission(struct kfd_node *node) { #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) - struct drm_device *ddev = adev_to_drm(kfd->adev); + struct drm_device *ddev; + + if (node->xcp) + ddev = node->xcp->ddev; + else + ddev = adev_to_drm(node->adev); return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR, ddev->render->index, -- GitLab From 514312c07f6cd2f1ffe5a90d42b6080868a03a26 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Tue, 9 Jan 2024 11:31:20 -0500 Subject: [PATCH 0433/1405] Revert "drm/amd/display: initialize all the dpm level's stutter latency" Revert commit 885c71ad791c ("drm/amd/display: initialize all the dpm level's stutter latency") Because it causes some regression Reviewed-by: Muhammad Ahmed Acked-by: Tom Chung Signed-off-by: Charlene Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 64d01a9cd68c8..8b0f930be5ae1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -341,9 +341,6 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, break; } - if (dml2->config.bbox_overrides.clks_table.num_states) - p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states; - /* Override from passed values, if available */ for (i = 0; i < p->in_states->num_states; i++) { if (dml2->config.bbox_overrides.sr_exit_latency_us) { @@ -400,6 +397,7 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, } /* Copy clocks tables entries, if available */ if (dml2->config.bbox_overrides.clks_table.num_states) { + p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states; for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels; i++) { p->in_states->state_array[i].dcfclk_mhz = dml2->config.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz; -- GitLab From 2ff33c759a4247c84ec0b7815f1f223e155ba82a Mon Sep 17 00:00:00 2001 From: Sohaib Nadeem Date: Tue, 16 Jan 2024 11:00:00 -0500 Subject: [PATCH 0434/1405] drm/amd/display: increased min_dcfclk_mhz and min_fclk_mhz [why] Originally, PMFW said min FCLK is 300Mhz, but min DCFCLK can be increased to 400Mhz because min FCLK is now 600Mhz so FCLK >= 1.5 * DCFCLK hardware requirement will still be satisfied. Increasing min DCFCLK addresses underflow issues (underflow occurs when phantom pipe is turned on for some Sub-Viewport configs). [how] Increasing DCFCLK by raising the min_dcfclk_mhz Reviewed-by: Chaitanya Dhere Reviewed-by: Alvin Lee Acked-by: Tom Chung Signed-off-by: Sohaib Nadeem Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 9f37f717a1f86..b13a6fd7cc832 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2753,7 +2753,7 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk struct _vcs_dpi_voltage_scaling_st entry = {0}; struct clk_limit_table_entry max_clk_data = {0}; - unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; + unsigned int min_dcfclk_mhz = 399, min_fclk_mhz = 599; static const unsigned int num_dcfclk_stas = 5; unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; -- GitLab From b5abd7f983e14054593dc91d6df2aa5f8cc67652 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Tue, 16 Jan 2024 19:54:15 -0500 Subject: [PATCH 0435/1405] drm/amd/display: fix USB-C flag update after enc10 feature init [why] BIOS's integration info table not following the original order which is phy instance is ext_displaypath's array index. [how] Move them to follow the original order. Reviewed-by: Muhammad Ahmed Acked-by: Tom Chung Signed-off-by: Charlene Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c index 501388014855c..d761b0df28784 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c @@ -203,12 +203,12 @@ void dcn32_link_encoder_construct( enc10->base.hpd_source = init_data->hpd_source; enc10->base.connector = init_data->connector; - if (enc10->base.connector.id == CONNECTOR_ID_USBC) - enc10->base.features.flags.bits.DP_IS_USB_C = 1; enc10->base.preferred_engine = ENGINE_ID_UNKNOWN; enc10->base.features = *enc_features; + if (enc10->base.connector.id == CONNECTOR_ID_USBC) + enc10->base.features.flags.bits.DP_IS_USB_C = 1; enc10->base.transmitter = init_data->transmitter; diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c index da94e5309fbaf..81e349d5835bb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c @@ -184,8 +184,6 @@ void dcn35_link_encoder_construct( enc10->base.hpd_source = init_data->hpd_source; enc10->base.connector = init_data->connector; - if (enc10->base.connector.id == CONNECTOR_ID_USBC) - enc10->base.features.flags.bits.DP_IS_USB_C = 1; enc10->base.preferred_engine = ENGINE_ID_UNKNOWN; @@ -240,6 +238,8 @@ void dcn35_link_encoder_construct( } enc10->base.features.flags.bits.HDMI_6GB_EN = 1; + if (enc10->base.connector.id == CONNECTOR_ID_USBC) + enc10->base.features.flags.bits.DP_IS_USB_C = 1; if (bp_funcs->get_connector_speed_cap_info) result = bp_funcs->get_connector_speed_cap_info(enc10->base.ctx->dc_bios, -- GitLab From 31c2bf25eaf51c2d45f092284a28e97f43b54c15 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Wed, 17 Jan 2024 16:46:02 -0500 Subject: [PATCH 0436/1405] drm/amd/display: Fix DPSTREAM CLK on and off sequence [Why] Secondary DP2 display fails to light up in some instances [How] Clock needs to be on when DPSTREAMCLK*_EN =1. This change moves dtbclk_p enable/disable point to make sure this is the case Reviewed-by: Charlene Liu Reviewed-by: Dmytro Laktyushkin Acked-by: Tom Chung Signed-off-by: Daniel Miess Signed-off-by: Dmytro Laktyushkin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 2 +- .../gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 5660f15da291e..2352428bcea3c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1183,9 +1183,9 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx) dto_params.timing = &pipe_ctx->stream->timing; dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; if (dccg) { - dccg->funcs->set_dtbclk_dto(dccg, &dto_params); dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst); dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst); + dccg->funcs->set_dtbclk_dto(dccg, &dto_params); } } else if (dccg && dccg->funcs->disable_symclk_se) { dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index e931342fcf4cf..4853ecac53f91 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -2790,18 +2790,17 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx) } if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) { - dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; - dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst); - - phyd32clk = get_phyd32clk_src(link); - dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); - dto_params.otg_inst = tg->inst; dto_params.pixclk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10; dto_params.num_odm_segments = get_odm_segment_count(pipe_ctx); dto_params.timing = &pipe_ctx->stream->timing; dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr); dccg->funcs->set_dtbclk_dto(dccg, &dto_params); + dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; + dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst); + + phyd32clk = get_phyd32clk_src(link); + dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); } else { if (dccg->funcs->enable_symclk_se) dccg->funcs->enable_symclk_se(dccg, stream_enc->stream_enc_inst, -- GitLab From 39079fe8e660851abbafa90cd55cbf029210661f Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Thu, 18 Jan 2024 15:14:15 -0500 Subject: [PATCH 0437/1405] drm/amd/display: fix incorrect mpc_combine array size [why] MAX_SURFACES is per stream, while MAX_PLANES is per asic. The mpc_combine is an array that records all the planes per asic. Therefore MAX_PLANES should be used as the array size. Using MAX_SURFACES causes array overflow when there are more than 3 planes. [how] Use the MAX_PLANES for the mpc_combine array size. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Rodrigo Siqueira Reviewed-by: Nevenko Stupar Reviewed-by: Chaitanya Dhere Acked-by: Tom Chung Signed-off-by: Wenjing Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index b13a6fd7cc832..dd781a20692ee 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1112,7 +1112,7 @@ struct pipe_slice_table { struct pipe_ctx *pri_pipe; struct dc_plane_state *plane; int slice_count; - } mpc_combines[MAX_SURFACES]; + } mpc_combines[MAX_PLANES]; int mpc_combine_count; }; -- GitLab From 191cb4ed33a61c90feed8bda0f0df3a419604fc8 Mon Sep 17 00:00:00 2001 From: Nicholas Susanto Date: Thu, 18 Jan 2024 13:34:40 -0500 Subject: [PATCH 0438/1405] drm/amd/display: Underflow workaround by increasing SR exit latency [Why] On 14us for exit latency time causes underflow for 8K monitor with HDR on. Increasing the latency to 28us fixes the underflow. [How] Increase the latency to 28us. This workaround should be sufficient before we figure out why SR exit so long. Reviewed-by: Chaitanya Dhere Acked-by: Tom Chung Signed-off-by: Nicholas Susanto Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 32 +++++++++---------- .../drm/amd/display/dc/dml/dcn35/dcn35_fpu.c | 4 +-- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 9c660d1facc76..14cec1c7b718c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -437,32 +437,32 @@ static struct wm_table ddr5_wm_table = { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, } @@ -474,32 +474,32 @@ static struct wm_table lpddr5_wm_table = { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .valid = true, }, } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index 475c4ec43c013..7ea2bd5374d51 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -164,8 +164,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { }, }, .num_states = 5, - .sr_exit_time_us = 14.0, - .sr_enter_plus_exit_time_us = 16.0, + .sr_exit_time_us = 28.0, + .sr_enter_plus_exit_time_us = 30.0, .sr_exit_z8_time_us = 210.0, .sr_enter_plus_exit_z8_time_us = 320.0, .fclk_change_latency_us = 24.0, -- GitLab From faf51b201bc42adf500945732abb6220c707d6f3 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Thu, 11 Jan 2024 14:46:01 -0500 Subject: [PATCH 0439/1405] drm/amd/display: Fix dcn35 8k30 Underflow/Corruption Issue [why] odm calculation is missing for pipe split policy determination and cause Underflow/Corruption issue. [how] Add the odm calculation. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Charlene Liu Acked-by: Tom Chung Signed-off-by: Fangzhi Zuo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/dml2/dml2_translation_helper.c | 29 +++++++------------ .../gpu/drm/amd/display/dc/inc/core_types.h | 2 ++ 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 8b0f930be5ae1..23a608274096f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -791,35 +791,28 @@ static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_p } } -/*TODO no support for mpc combine, need rework - should calculate scaling params based on plane+stream*/ -static struct scaler_data get_scaler_data_for_plane(const struct dc_plane_state *in, const struct dc_state *context) +static struct scaler_data get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc_state *context) { int i; - struct scaler_data data = { 0 }; + struct pipe_ctx *temp_pipe = &context->res_ctx.temp_pipe; + + memset(temp_pipe, 0, sizeof(struct pipe_ctx)); for (i = 0; i < MAX_PIPES; i++) { const struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; if (pipe->plane_state == in && !pipe->prev_odm_pipe) { - const struct pipe_ctx *next_pipe = pipe->next_odm_pipe; - - data = context->res_ctx.pipe_ctx[i].plane_res.scl_data; - while (next_pipe) { - data.h_active += next_pipe->plane_res.scl_data.h_active; - data.recout.width += next_pipe->plane_res.scl_data.recout.width; - if (in->rotation == ROTATION_ANGLE_0 || in->rotation == ROTATION_ANGLE_180) { - data.viewport.width += next_pipe->plane_res.scl_data.viewport.width; - } else { - data.viewport.height += next_pipe->plane_res.scl_data.viewport.height; - } - next_pipe = next_pipe->next_odm_pipe; - } + temp_pipe->stream = pipe->stream; + temp_pipe->plane_state = pipe->plane_state; + temp_pipe->plane_res.scl_data.taps = pipe->plane_res.scl_data.taps; + + resource_build_scaling_params(temp_pipe); break; } } ASSERT(i < MAX_PIPES); - return data; + return temp_pipe->plane_res.scl_data; } static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_stream_state *in) @@ -864,7 +857,7 @@ static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned out->ScalerEnabled[location] = false; } -static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_plane_state *in, const struct dc_state *context) +static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out, unsigned int location, const struct dc_plane_state *in, struct dc_state *context) { const struct scaler_data scaler_data = get_scaler_data_for_plane(in, context); diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index f74ae0d41d3c4..3a6bf77a68732 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -469,6 +469,8 @@ struct resource_context { unsigned int hpo_dp_link_enc_to_link_idx[MAX_HPO_DP2_LINK_ENCODERS]; int hpo_dp_link_enc_ref_cnts[MAX_HPO_DP2_LINK_ENCODERS]; bool is_mpc_3dlut_acquired[MAX_PIPES]; + /* solely used for build scalar data in dml2 */ + struct pipe_ctx temp_pipe; }; struct dce_bw_output { -- GitLab From bb34bc2cd3ee284d7992df24a3f7d24f61a59268 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Fri, 5 Jan 2024 14:05:25 +0800 Subject: [PATCH 0440/1405] drm/amdgpu: Fix the warning info in mode1 reset Fix the warning info below during mode1 reset. [ +0.000004] Call Trace: [ +0.000004] [ +0.000006] ? show_regs+0x6e/0x80 [ +0.000011] ? __flush_work.isra.0+0x2e8/0x390 [ +0.000005] ? __warn+0x91/0x150 [ +0.000009] ? __flush_work.isra.0+0x2e8/0x390 [ +0.000006] ? report_bug+0x19d/0x1b0 [ +0.000013] ? handle_bug+0x46/0x80 [ +0.000012] ? exc_invalid_op+0x1d/0x80 [ +0.000011] ? asm_exc_invalid_op+0x1f/0x30 [ +0.000014] ? __flush_work.isra.0+0x2e8/0x390 [ +0.000007] ? __flush_work.isra.0+0x208/0x390 [ +0.000007] ? _prb_read_valid+0x216/0x290 [ +0.000008] __cancel_work_timer+0x11d/0x1a0 [ +0.000007] ? try_to_grab_pending+0xe8/0x190 [ +0.000012] cancel_work_sync+0x14/0x20 [ +0.000008] amddrm_sched_stop+0x3c/0x1d0 [amd_sched] [ +0.000032] amdgpu_device_gpu_recover+0x29a/0xe90 [amdgpu] This warning info was printed after applying the patch "drm/sched: Convert drm scheduler to use a work queue rather than kthread". The root cause is that amdgpu driver tries to use the uninitialized work_struct in the struct drm_gpu_scheduler v2: - Rename the function to amdgpu_ring_sched_ready and move it to amdgpu_ring.c (Alex) v3: - Fix a few more checks based on Vitaly's patch (Alex) v4: - squash in fix noticed by Bert in https://gitlab.freedesktop.org/drm/amd/-/issues/3139 Fixes: 11b3b9f461c5 ("drm/sched: Check scheduler ready before calling timeout handling") Reviewed-by: Alex Deucher Signed-off-by: Vitaly Prosyak Signed-off-by: Ma Jun Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 12 ++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +- 5 files changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 899e31e3a5e81..3a3f3ce09f00d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -290,7 +290,7 @@ static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool sus for (i = 0; i < adev->gfx.num_compute_rings; i++) { struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - if (!(ring && drm_sched_wqueue_ready(&ring->sched))) + if (!amdgpu_ring_sched_ready(ring)) continue; /* stop secheduler and drain ring. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index e485dd3357c63..1afbb2e932c6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1678,7 +1678,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_wqueue_stop(&ring->sched); } @@ -1694,7 +1694,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_wqueue_start(&ring->sched); } @@ -1916,8 +1916,8 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) ring = adev->rings[val]; - if (!ring || !ring->funcs->preempt_ib || - !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring) || + !ring->funcs->preempt_ib) return -EINVAL; /* the last preemption failed */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 31b28e6f35b26..fdde7488d0ed9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5021,7 +5021,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; spin_lock(&ring->sched.job_list_lock); @@ -5160,7 +5160,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; /* Clear job fence from fence drv to avoid force_completion @@ -5627,7 +5627,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_stop(&ring->sched, job ? &job->base : NULL); @@ -5696,7 +5696,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_start(&ring->sched, true); @@ -6051,7 +6051,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_stop(&ring->sched, NULL); @@ -6179,7 +6179,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_start(&ring->sched, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 45424ebf96814..5505d646f43aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -635,6 +635,7 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring) ring->name); ring->sched.ready = !r; + return r; } @@ -717,3 +718,14 @@ void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring) if (ring->is_sw_ring) amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE); } + +bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring) +{ + if (!ring) + return false; + + if (ring->no_scheduler || !drm_sched_wqueue_ready(&ring->sched)) + return false; + + return true; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index bbb53720a0181..fe1a61eb6e4c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -450,5 +450,5 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, int amdgpu_ib_pool_init(struct amdgpu_device *adev); void amdgpu_ib_pool_fini(struct amdgpu_device *adev); int amdgpu_ib_ring_tests(struct amdgpu_device *adev); - +bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring); #endif -- GitLab From 8ef85a0ce24a6d9322dfa2a67477e473c3619b4f Mon Sep 17 00:00:00 2001 From: David McFarland Date: Mon, 29 Jan 2024 18:18:22 -0400 Subject: [PATCH 0441/1405] drm/amd: Don't init MEC2 firmware when it fails to load The same calls are made directly above, but conditional on the firmware loading and validating successfully. Cc: stable@vger.kernel.org Fixes: 9931b67690cf ("drm/amd: Load GFX10 microcode during early_init") Signed-off-by: David McFarland Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index ecb622b7f9709..dcdecb18b2306 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4027,8 +4027,6 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) err = 0; adev->gfx.mec2_fw = NULL; } - amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); - amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT); gfx_v10_0_check_fw_write_wait(adev); out: -- GitLab From 492a1e67ee59312b27c85c275298080fde392190 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 30 Jan 2024 14:06:43 +0530 Subject: [PATCH 0442/1405] drm/amd/display: Add NULL check for kzalloc in 'amdgpu_dm_atomic_commit_tail()' Add a NULL check for the kzalloc call that allocates memory for dummy_updates in the amdgpu_dm_atomic_commit_tail function. Previously, if kzalloc failed to allocate memory and returned NULL, the code would attempt to use the NULL pointer. The fix is to check if kzalloc returns NULL, and if so, log an error message and skip the rest of the current loop iteration with the continue statement. This prevents the code from attempting to use the NULL pointer. Cc: Julia Lawall Cc: Aurabindo Pillai Cc: Rodrigo Siqueira Cc: Alex Hung Cc: Alex Deucher Reported-by: Julia Lawall Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202401300629.ICnCt983-lkp@intel.com/ Fixes: 135fd1b35690 ("drm/amd/display: Reduce stack size") Signed-off-by: Srinivasan Shanmugam Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6cda5b5363626..d292f290cd6eb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9187,6 +9187,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) * To fix this, DC should permit updating only stream properties. */ dummy_updates = kzalloc(sizeof(struct dc_surface_update) * MAX_SURFACES, GFP_ATOMIC); + if (!dummy_updates) { + DRM_ERROR("Failed to allocate memory for dummy_updates.\n"); + continue; + } for (j = 0; j < status->plane_count; j++) dummy_updates[j].surface = status->plane_states[0]; -- GitLab From 97cba232549b9fe7e491fb60a69cf93075015f29 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 29 Jan 2024 21:17:09 +0530 Subject: [PATCH 0443/1405] drm/amd/display: Fix buffer overflow in 'get_host_router_total_dp_tunnel_bw()' The error message buffer overflow 'dc->links' 12 <= 12 suggests that the code is trying to access an element of the dc->links array that is beyond its bounds. In C, arrays are zero-indexed, so an array with 12 elements has valid indices from 0 to 11. Trying to access dc->links[12] would be an attempt to access the 13th element of a 12-element array, which is a buffer overflow. To fix this, ensure that the loop does not go beyond the last valid index when accessing dc->links[i + 1] by subtracting 1 from the loop condition. This would ensure that i + 1 is always a valid index in the array. Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_dpia_bw.c:208 get_host_router_total_dp_tunnel_bw() error: buffer overflow 'dc->links' 12 <= 12 Fixes: 59f1622a5f05 ("drm/amd/display: Add dpia display mode validation logic") Cc: PeiChen Huang Cc: Aric Cyr Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Cc: Meenakshikumar Somasundaram Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index dd0d2b206462c..5491b707cec88 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -196,7 +196,7 @@ static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_in struct dc_link *link_dpia_primary, *link_dpia_secondary; int total_bw = 0; - for (uint8_t i = 0; i < MAX_PIPES * 2; ++i) { + for (uint8_t i = 0; i < (MAX_PIPES * 2) - 1; ++i) { if (!dc->links[i] || dc->links[i]->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) continue; -- GitLab From 16da399091dca3d1e48109086403587af37cc196 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 30 Jan 2024 12:10:38 +0530 Subject: [PATCH 0444/1405] drm/amdgpu: Fix missing error code in 'gmc_v6/7/8/9_0_hw_init()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return 0 for success scenairos in 'gmc_v6/7/8/9_0_hw_init()' Fixes the below: drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c:920 gmc_v6_0_hw_init() warn: missing error code? 'r' drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c:1104 gmc_v7_0_hw_init() warn: missing error code? 'r' drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c:1224 gmc_v8_0_hw_init() warn: missing error code? 'r' drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c:2347 gmc_v9_0_hw_init() warn: missing error code? 'r' Fixes: fac4ebd79fed ("drm/amdgpu: Fix with right return code '-EIO' in 'amdgpu_gmc_vram_checking()'") Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 42e103d7077d5..59d9215e55562 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -915,8 +915,8 @@ static int gmc_v6_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } static int gmc_v6_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index efc16e580f1e2..45a2f8e031a2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1099,8 +1099,8 @@ static int gmc_v7_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } static int gmc_v7_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index ff4ae73d27ecd..4422b27a3cc2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1219,8 +1219,8 @@ static int gmc_v8_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } static int gmc_v8_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 17b7a25121b00..40a00ea0009f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -2341,8 +2341,8 @@ static int gmc_v9_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } /** -- GitLab From 9c29282ecbeeb1b43fced3055c6a5bb244b9390b Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Thu, 11 Jan 2024 12:27:07 +0800 Subject: [PATCH 0445/1405] drm/amdkfd: reserve the BO before validating it Fix a warning. v2: Avoid unmapping attachment repeatedly when ERESTARTSYS. v3: Lock the BO before accessing ttm->sg to avoid race conditions.(Felix) [ 41.708711] WARNING: CPU: 0 PID: 1463 at drivers/gpu/drm/ttm/ttm_bo.c:846 ttm_bo_validate+0x146/0x1b0 [ttm] [ 41.708989] Call Trace: [ 41.708992] [ 41.708996] ? show_regs+0x6c/0x80 [ 41.709000] ? ttm_bo_validate+0x146/0x1b0 [ttm] [ 41.709008] ? __warn+0x93/0x190 [ 41.709014] ? ttm_bo_validate+0x146/0x1b0 [ttm] [ 41.709024] ? report_bug+0x1f9/0x210 [ 41.709035] ? handle_bug+0x46/0x80 [ 41.709041] ? exc_invalid_op+0x1d/0x80 [ 41.709048] ? asm_exc_invalid_op+0x1f/0x30 [ 41.709057] ? amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x2c/0x80 [amdgpu] [ 41.709185] ? ttm_bo_validate+0x146/0x1b0 [ttm] [ 41.709197] ? amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x2c/0x80 [amdgpu] [ 41.709337] ? srso_alias_return_thunk+0x5/0x7f [ 41.709346] kfd_mem_dmaunmap_attachment+0x9e/0x1e0 [amdgpu] [ 41.709467] amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x56/0x80 [amdgpu] [ 41.709586] kfd_ioctl_unmap_memory_from_gpu+0x1b7/0x300 [amdgpu] [ 41.709710] kfd_ioctl+0x1ec/0x650 [amdgpu] [ 41.709822] ? __pfx_kfd_ioctl_unmap_memory_from_gpu+0x10/0x10 [amdgpu] [ 41.709945] ? srso_alias_return_thunk+0x5/0x7f [ 41.709949] ? tomoyo_file_ioctl+0x20/0x30 [ 41.709959] __x64_sys_ioctl+0x9c/0xd0 [ 41.709967] do_syscall_64+0x3f/0x90 [ 41.709973] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Fixes: 101b8104307e ("drm/amdkfd: Move dma unmapping after TLB flush") Signed-off-by: Lang Yu Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 20 ++++++++++++++++--- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 +++- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 937d0f0b21dfb..27c61c535e297 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -303,7 +303,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv); -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv); +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( struct amdgpu_device *adev, struct kgd_mem *mem, bool intr); int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f183d7faeeece..231fd927dcfbe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2085,21 +2085,35 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( return ret; } -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv) +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv) { struct kfd_mem_attachment *entry; struct amdgpu_vm *vm; + int ret; vm = drm_priv_to_vm(drm_priv); mutex_lock(&mem->lock); + ret = amdgpu_bo_reserve(mem->bo, true); + if (ret) + goto out; + list_for_each_entry(entry, &mem->attachments, list) { - if (entry->bo_va->base.vm == vm) - kfd_mem_dmaunmap_attachment(mem, entry); + if (entry->bo_va->base.vm != vm) + continue; + if (entry->bo_va->base.bo->tbo.ttm && + !entry->bo_va->base.bo->tbo.ttm->sg) + continue; + + kfd_mem_dmaunmap_attachment(mem, entry); } + amdgpu_bo_unreserve(mem->bo); +out: mutex_unlock(&mem->lock); + + return ret; } int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index ce4c52ec34d80..80e90fdef291d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1442,7 +1442,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT); /* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */ - amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv); + err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv); + if (err) + goto sync_memory_failed; } mutex_unlock(&p->mutex); -- GitLab From de4a733868df3a1b899fd4b05c32e92474cc8f73 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Tue, 30 Jan 2024 13:14:39 +0800 Subject: [PATCH 0446/1405] drm/amdgpu: drm/amdgpu: remove golden setting for gfx 11.5.0 No need to set GC golden settings in driver from gfx 11.5.0 onwards. Signed-off-by: Yifan Zhang Acked-by: Alex Deucher Reviewed-by: Lang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index d9cf9fd03d300..4f3bfdc75b37d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -107,23 +107,6 @@ static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a) }; -static const struct soc15_reg_golden golden_settings_gc_11_5_0[] = { - SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_DEBUG5, 0xffffffff, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL, 0xffffffff, 0xf37fff3f), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xfffffffb, 0x00f40188), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x80009007), - SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf1ffffff, 0x00880007), - SOC15_REG_GOLDEN_VALUE(GC, 0, regPC_CONFIG_CNTL_1, 0xffffffff, 0x00010000), - SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), - SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL2, 0x007f0000, 0x00000000), - SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xffcfffff, 0x0000200a), - SOC15_REG_GOLDEN_VALUE(GC, 0, regUTCL1_CTRL_2, 0xffffffff, 0x0000048f) -}; - #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -304,11 +287,6 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_11_0_1, (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); break; - case IP_VERSION(11, 5, 0): - soc15_program_register_sequence(adev, - golden_settings_gc_11_5_0, - (const u32)ARRAY_SIZE(golden_settings_gc_11_5_0)); - break; default: break; } -- GitLab From 4f56acdee4c69224afde328bb6402a48b93f8221 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Tue, 30 Jan 2024 21:01:42 +0800 Subject: [PATCH 0447/1405] drm/amdgpu: remove asymmetrical irq disabling in vcn 4.0.5 suspend There is no irq enabled in vcn 4.0.5 resume, causing wrong amdgpu_irq_src status. Beside, current set function callbacks are empty with no real effect. Signed-off-by: Yifan Zhang Acked-by: Saleemkhan Jamadar Reviewed-by: Veerabadhran Gopalakrishnan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 17 ----------------- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 19 ------------------- 2 files changed, 36 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 169ed400ee7b7..8ab01ae919d2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -2017,22 +2017,6 @@ static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_sta return ret; } -/** - * vcn_v4_0_set_interrupt_state - set VCN block interrupt state - * - * @adev: amdgpu_device pointer - * @source: interrupt sources - * @type: interrupt types - * @state: interrupt states - * - * Set VCN block interrupt state - */ -static int vcn_v4_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, enum amdgpu_interrupt_state state) -{ - return 0; -} - /** * vcn_v4_0_set_ras_interrupt_state - set VCN block RAS interrupt state * @@ -2097,7 +2081,6 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_ } static const struct amdgpu_irq_src_funcs vcn_v4_0_irq_funcs = { - .set = vcn_v4_0_set_interrupt_state, .process = vcn_v4_0_process_interrupt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 2eda30e78f61d..49e4c3c09acab 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -269,8 +269,6 @@ static int vcn_v4_0_5_hw_fini(void *handle) vcn_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); } } - - amdgpu_irq_put(adev, &adev->vcn.inst[i].irq, 0); } return 0; @@ -1668,22 +1666,6 @@ static int vcn_v4_0_5_set_powergating_state(void *handle, enum amd_powergating_s return ret; } -/** - * vcn_v4_0_5_set_interrupt_state - set VCN block interrupt state - * - * @adev: amdgpu_device pointer - * @source: interrupt sources - * @type: interrupt types - * @state: interrupt states - * - * Set VCN block interrupt state - */ -static int vcn_v4_0_5_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, enum amdgpu_interrupt_state state) -{ - return 0; -} - /** * vcn_v4_0_5_process_interrupt - process VCN block interrupt * @@ -1726,7 +1708,6 @@ static int vcn_v4_0_5_process_interrupt(struct amdgpu_device *adev, struct amdgp } static const struct amdgpu_irq_src_funcs vcn_v4_0_5_irq_funcs = { - .set = vcn_v4_0_5_set_interrupt_state, .process = vcn_v4_0_5_process_interrupt, }; -- GitLab From 7330256268664ea0a7dd5b07a3fed363093477dd Mon Sep 17 00:00:00 2001 From: Friedrich Vock Date: Tue, 23 Jan 2024 12:52:03 +0100 Subject: [PATCH 0448/1405] drm/amdgpu: Reset IH OVERFLOW_CLEAR bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows us to detect subsequent IH ring buffer overflows as well. Cc: Joshua Ashton Cc: Alex Deucher Cc: Christian König Cc: stable@vger.kernel.org Signed-off-by: Friedrich Vock Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/cz_ih.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/ih_v6_0.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/ih_v6_1.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/si_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/tonga_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 6 ++++++ 10 files changed, 59 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index 6f7c031dd197a..f24e34dc33d1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -204,6 +204,12 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32(mmIH_RB_CNTL); tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(mmIH_RB_CNTL, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; + WREG32(mmIH_RB_CNTL, tmp); } return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index b8c47e0cf37ad..c19681492efa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -216,6 +216,11 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index aecad530b10a6..2c02ae69883d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -215,6 +215,11 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index d9ed7332d805d..ad4ad39f128f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -418,6 +418,12 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c index 8fb05eae340ad..b8da0fc29378c 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c @@ -418,6 +418,13 @@ static u32 ih_v6_1_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index e64b33115848d..de93614726c9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -442,6 +442,12 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 9a24f17a57502..cada9f300a7f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -119,6 +119,12 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32(IH_RB_CNTL); tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(IH_RB_CNTL, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; + WREG32(IH_RB_CNTL, tmp); } return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 917707bba7f36..450b6e8315091 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -219,6 +219,12 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index d364c6dd152c3..bf68e18e3824b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -373,6 +373,12 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index ddfc6941f9d55..db66e6cccaf2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -421,6 +421,12 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } -- GitLab From ee36a3b345c433a846effcdcfba437c2298eeda5 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 29 Jan 2024 13:58:13 +0000 Subject: [PATCH 0449/1405] cifs: make sure that channel scaling is done only once Following a successful cifs_tree_connect, we have the code to scale up/down the number of channels in the session. However, it is not protected by a lock today. As a result, this code can be executed by several processes that select the same channel. The core functions handle this well, as they pick chan_lock. However, we've seen cases where smb2_reconnect throws some warnings. To fix that, this change introduces a flags bitmap inside the cifs_ses structure. A new flag type is used to ensure that only one process enters this section at any time. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 3 +++ fs/smb/client/smb2pdu.c | 20 +++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 16befff4cbb47..9093c507042fa 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1032,6 +1032,8 @@ struct cifs_chan { __u8 signkey[SMB3_SIGN_KEY_SIZE]; }; +#define CIFS_SES_FLAG_SCALE_CHANNELS (0x1) + /* * Session structure. One of these for each uid session with a particular host */ @@ -1064,6 +1066,7 @@ struct cifs_ses { enum securityEnum sectype; /* what security flavor was specified? */ bool sign; /* is signing required? */ bool domainAuto:1; + unsigned int flags; __u16 session_flags; __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3encryptionkey[SMB3_ENC_DEC_KEY_SIZE]; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 86f6f35b7f32e..6db54c6ef5719 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -399,6 +399,15 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, goto out; } + spin_lock(&ses->ses_lock); + if (ses->flags & CIFS_SES_FLAG_SCALE_CHANNELS) { + spin_unlock(&ses->ses_lock); + mutex_unlock(&ses->session_mutex); + goto skip_add_channels; + } + ses->flags |= CIFS_SES_FLAG_SCALE_CHANNELS; + spin_unlock(&ses->ses_lock); + if (!rc && (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { mutex_unlock(&ses->session_mutex); @@ -428,17 +437,22 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, if (ses->chan_max > ses->chan_count && ses->iface_count && !SERVER_IS_CHAN(server)) { - if (ses->chan_count == 1) + if (ses->chan_count == 1) { cifs_server_dbg(VFS, "supports multichannel now\n"); + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); + } cifs_try_adding_channels(ses); - queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, - (SMB_INTERFACE_POLL_INTERVAL * HZ)); } } else { mutex_unlock(&ses->session_mutex); } + skip_add_channels: + spin_lock(&ses->ses_lock); + ses->flags &= ~CIFS_SES_FLAG_SCALE_CHANNELS; + spin_unlock(&ses->ses_lock); if (smb2_command != SMB2_INTERNAL_CMD) mod_delayed_work(cifsiod_wq, &server->reconnect, 0); -- GitLab From e028243003ad6e1417cc8ac19af9ded672b9ec59 Mon Sep 17 00:00:00 2001 From: Andrew Halaney Date: Mon, 29 Jan 2024 11:12:11 -0600 Subject: [PATCH 0450/1405] MAINTAINERS: Drop unreachable reviewer for Qualcomm ETHQOS ethernet driver Bhupesh's email responds indicating they've changed employers and with no new contact information. Let's drop the line from MAINTAINERS to avoid getting the same response over and over. Signed-off-by: Andrew Halaney Link: https://lore.kernel.org/r/20240129-remove-dwmac-qcom-ethqos-reviewer-v1-1-2645eab61451@redhat.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9435ea0f8cda0..6ae30c50e79fc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18085,7 +18085,6 @@ F: drivers/net/ethernet/qualcomm/emac/ QUALCOMM ETHQOS ETHERNET DRIVER M: Vinod Koul -R: Bhupesh Sharma L: netdev@vger.kernel.org L: linux-arm-msm@vger.kernel.org S: Maintained -- GitLab From 5dee6d6923458e26966717f2a3eae7d09fc10bf6 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Mon, 29 Jan 2024 17:10:17 +0800 Subject: [PATCH 0451/1405] net: ipv4: fix a memleak in ip_setup_cork When inetdev_valid_mtu fails, cork->opt should be freed if it is allocated in ip_setup_cork. Otherwise there could be a memleak. Fixes: 501a90c94510 ("inet: protect against too small mtu values.") Signed-off-by: Zhipeng Lu Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240129091017.2938835-1-alexious@zju.edu.cn Signed-off-by: Jakub Kicinski --- net/ipv4/ip_output.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b06f678b03a19..41537d18eecfd 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1287,6 +1287,12 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, if (unlikely(!rt)) return -EFAULT; + cork->fragsize = ip_sk_use_pmtu(sk) ? + dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); + + if (!inetdev_valid_mtu(cork->fragsize)) + return -ENETUNREACH; + /* * setup for corking. */ @@ -1303,12 +1309,6 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->addr = ipc->addr; } - cork->fragsize = ip_sk_use_pmtu(sk) ? - dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); - - if (!inetdev_valid_mtu(cork->fragsize)) - return -ENETUNREACH; - cork->gso_size = ipc->gso_size; cork->dst = &rt->dst; -- GitLab From 585b40e25dc9ff3d2b03d1495150540849009e5b Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 29 Jan 2024 23:49:48 +0100 Subject: [PATCH 0452/1405] net: dsa: mv88e6xxx: Fix failed probe due to unsupported C45 reads Not all mv88e6xxx device support C45 read/write operations. Those which do not return -EOPNOTSUPP. However, when phylib scans the bus, it considers this fatal, and the probe of the MDIO bus fails, which in term causes the mv88e6xxx probe as a whole to fail. When there is no device on the bus for a given address, the pull up resistor on the data line results in the read returning 0xffff. The phylib core code understands this when scanning for devices on the bus. C45 allows multiple devices to be supported at one address, so phylib will perform a few reads at each address, so although thought not the most efficient solution, it is a way to avoid fatal errors. Make use of this as a minimal fix for stable to fix the probing problems. Follow up patches will rework how C45 operates to make it similar to C22 which considers -ENODEV as a none-fatal, and swap mv88e6xxx to using this. Cc: stable@vger.kernel.org Fixes: 743a19e38d02 ("net: dsa: mv88e6xxx: Separate C22 and C45 transactions") Reported-by: Tim Menninger Signed-off-by: Andrew Lunn Link: https://lore.kernel.org/r/20240129224948.1531452-1-andrew@lunn.ch Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 383b3c4d6f599..614cabb5c1b03 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3659,7 +3659,7 @@ static int mv88e6xxx_mdio_read_c45(struct mii_bus *bus, int phy, int devad, int err; if (!chip->info->ops->phy_read_c45) - return -EOPNOTSUPP; + return 0xffff; mv88e6xxx_reg_lock(chip); err = chip->info->ops->phy_read_c45(chip, bus, phy, devad, reg, &val); -- GitLab From f9ddefb6c0de771038b041eaad5cc15e61fe283f Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 29 Jan 2024 07:39:45 +0100 Subject: [PATCH 0453/1405] nvme-auth: open-code single-use macros No point in having macros just for a single function nvme_auth_submit(). Open-code them into the caller. Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/auth.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 2a12ee878783f..3dce480d932e3 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -48,11 +48,6 @@ struct nvme_dhchap_queue_context { static struct workqueue_struct *nvme_auth_wq; -#define nvme_auth_flags_from_qid(qid) \ - (qid == 0) ? 0 : BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED -#define nvme_auth_queue_from_qid(ctrl, qid) \ - (qid == 0) ? (ctrl)->fabrics_q : (ctrl)->connect_q - static inline int ctrl_max_dhchaps(struct nvme_ctrl *ctrl) { return ctrl->opts->nr_io_queues + ctrl->opts->nr_write_queues + @@ -63,10 +58,15 @@ static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid, void *data, size_t data_len, bool auth_send) { struct nvme_command cmd = {}; - blk_mq_req_flags_t flags = nvme_auth_flags_from_qid(qid); - struct request_queue *q = nvme_auth_queue_from_qid(ctrl, qid); + blk_mq_req_flags_t flags = 0; + struct request_queue *q = ctrl->fabrics_q; int ret; + if (qid != 0) { + flags |= BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED; + q = ctrl->connect_q; + } + cmd.auth_common.opcode = nvme_fabrics_command; cmd.auth_common.secp = NVME_AUTH_DHCHAP_PROTOCOL_IDENTIFIER; cmd.auth_common.spsp0 = 0x01; -- GitLab From bd2687f2e5940f6ee14bfae787b3c1f5f0574907 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 29 Jan 2024 07:39:46 +0100 Subject: [PATCH 0454/1405] nvme: change __nvme_submit_sync_cmd() calling conventions Combine the two arguments 'flags' and 'at_head' from __nvme_submit_sync_cmd() into a single 'flags' argument and use function-specific values to indicate what should be set within the function. Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/auth.c | 7 +++---- drivers/nvme/host/core.c | 19 ++++++++++++------- drivers/nvme/host/fabrics.c | 18 +++++++++++------- drivers/nvme/host/nvme.h | 17 +++++++++++++++-- 4 files changed, 41 insertions(+), 20 deletions(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 3dce480d932e3..bf69be8966f44 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -58,12 +58,12 @@ static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid, void *data, size_t data_len, bool auth_send) { struct nvme_command cmd = {}; - blk_mq_req_flags_t flags = 0; + nvme_submit_flags_t flags = 0; struct request_queue *q = ctrl->fabrics_q; int ret; if (qid != 0) { - flags |= BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED; + flags |= NVME_SUBMIT_NOWAIT | NVME_SUBMIT_RESERVED; q = ctrl->connect_q; } @@ -80,8 +80,7 @@ static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid, } ret = __nvme_submit_sync_cmd(q, &cmd, NULL, data, data_len, - qid == 0 ? NVME_QID_ANY : qid, - 0, flags); + qid == 0 ? NVME_QID_ANY : qid, flags); if (ret > 0) dev_warn(ctrl->device, "qid %d auth_send failed with status %d\n", qid, ret); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a42c347d08e87..aed099a457911 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1051,15 +1051,20 @@ EXPORT_SYMBOL_NS_GPL(nvme_execute_rq, NVME_TARGET_PASSTHRU); */ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, - int qid, int at_head, blk_mq_req_flags_t flags) + int qid, nvme_submit_flags_t flags) { struct request *req; int ret; + blk_mq_req_flags_t blk_flags = 0; + if (flags & NVME_SUBMIT_NOWAIT) + blk_flags |= BLK_MQ_REQ_NOWAIT; + if (flags & NVME_SUBMIT_RESERVED) + blk_flags |= BLK_MQ_REQ_RESERVED; if (qid == NVME_QID_ANY) - req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags); + req = blk_mq_alloc_request(q, nvme_req_op(cmd), blk_flags); else - req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags, + req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), blk_flags, qid - 1); if (IS_ERR(req)) @@ -1072,7 +1077,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, goto out; } - ret = nvme_execute_rq(req, at_head); + ret = nvme_execute_rq(req, flags & NVME_SUBMIT_AT_HEAD); if (result && ret >= 0) *result = nvme_req(req)->result; out: @@ -1085,7 +1090,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buffer, unsigned bufflen) { return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0); } EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd); @@ -1560,7 +1565,7 @@ static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid, c.features.dword11 = cpu_to_le32(dword11); ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, - buffer, buflen, NVME_QID_ANY, 0, 0); + buffer, buflen, NVME_QID_ANY, 0); if (ret >= 0 && result) *result = le32_to_cpu(res.u32); return ret; @@ -2172,7 +2177,7 @@ static int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t l cmd.common.cdw11 = cpu_to_le32(len); return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, - NVME_QID_ANY, 1, 0); + NVME_QID_ANY, NVME_SUBMIT_AT_HEAD); } static void nvme_configure_opal(struct nvme_ctrl *ctrl, bool was_suspended) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 373ed08e6b920..3499acbf6a822 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -180,7 +180,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) cmd.prop_get.offset = cpu_to_le32(off); ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0); if (ret >= 0) *val = le64_to_cpu(res.u64); @@ -226,7 +226,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) cmd.prop_get.offset = cpu_to_le32(off); ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0); if (ret >= 0) *val = le64_to_cpu(res.u64); @@ -271,7 +271,7 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val) cmd.prop_set.value = cpu_to_le64(val); ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, NULL, NULL, 0, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0); if (unlikely(ret)) dev_err(ctrl->device, "Property Set error: %d, offset %#x\n", @@ -450,8 +450,10 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) return -ENOMEM; ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, - data, sizeof(*data), NVME_QID_ANY, 1, - BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); + data, sizeof(*data), NVME_QID_ANY, + NVME_SUBMIT_AT_HEAD | + NVME_SUBMIT_NOWAIT | + NVME_SUBMIT_RESERVED); if (ret) { nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); @@ -525,8 +527,10 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) return -ENOMEM; ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res, - data, sizeof(*data), qid, 1, - BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); + data, sizeof(*data), qid, + NVME_SUBMIT_AT_HEAD | + NVME_SUBMIT_RESERVED | + NVME_SUBMIT_NOWAIT); if (ret) { nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1700063bc24de..7d315b670e537 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -837,12 +837,25 @@ static inline bool nvme_is_unique_nsid(struct nvme_ctrl *ctrl, (ctrl->ctratt & NVME_CTRL_CTRATT_NVM_SETS); } +/* + * Flags for __nvme_submit_sync_cmd() + */ +typedef __u32 __bitwise nvme_submit_flags_t; + +enum { + /* Insert request at the head of the queue */ + NVME_SUBMIT_AT_HEAD = (__force nvme_submit_flags_t)(1 << 0), + /* Set BLK_MQ_REQ_NOWAIT when allocating request */ + NVME_SUBMIT_NOWAIT = (__force nvme_submit_flags_t)(1 << 1), + /* Set BLK_MQ_REQ_RESERVED when allocating request */ + NVME_SUBMIT_RESERVED = (__force nvme_submit_flags_t)(1 << 2), +}; + int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buf, unsigned bufflen); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, - int qid, int at_head, - blk_mq_req_flags_t flags); + int qid, nvme_submit_flags_t flags); int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid, unsigned int dword11, void *buffer, size_t buflen, u32 *result); -- GitLab From 48dae46676d1acb632a3e1e14d02879d57618b5d Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 29 Jan 2024 07:39:48 +0100 Subject: [PATCH 0455/1405] nvme: enable retries for authentication commands Authentication commands might trigger a lengthy computation on the controller or even a callout to an external entity. In these cases the controller might return a status without the DNR bit set, indicating that the command should be retried. This patch enables retries for authentication commands by setting NVME_SUBMIT_RETRY for __nvme_submit_sync_cmd(). Reported-by: Martin George Signed-off-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/auth.c | 2 +- drivers/nvme/host/core.c | 2 ++ drivers/nvme/host/nvme.h | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index bf69be8966f44..a264b3ae078b8 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -58,7 +58,7 @@ static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid, void *data, size_t data_len, bool auth_send) { struct nvme_command cmd = {}; - nvme_submit_flags_t flags = 0; + nvme_submit_flags_t flags = NVME_SUBMIT_RETRY; struct request_queue *q = ctrl->fabrics_q; int ret; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index aed099a457911..8b48b7f7871ad 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1070,6 +1070,8 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, if (IS_ERR(req)) return PTR_ERR(req); nvme_init_request(req, cmd); + if (flags & NVME_SUBMIT_RETRY) + req->cmd_flags &= ~REQ_FAILFAST_DRIVER; if (buffer && bufflen) { ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 7d315b670e537..945bf15dccec6 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -849,6 +849,8 @@ enum { NVME_SUBMIT_NOWAIT = (__force nvme_submit_flags_t)(1 << 1), /* Set BLK_MQ_REQ_RESERVED when allocating request */ NVME_SUBMIT_RESERVED = (__force nvme_submit_flags_t)(1 << 2), + /* Retry command when NVME_SC_DNR is not set in the result */ + NVME_SUBMIT_RETRY = (__force nvme_submit_flags_t)(1 << 3), }; int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, -- GitLab From 0945b43b4ef8833d73daf5d057d07b64a23b4220 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 31 Jan 2024 15:01:38 -0800 Subject: [PATCH 0456/1405] nvme-common: add module description Add MODULE_DESCRIPTION() in order to remove warnings & get clean build:- WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/common/nvme-auth.o WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/nvme/common/nvme-keyring.o Signed-off-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/common/auth.c | 1 + drivers/nvme/common/keyring.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c index a23ab5c968b94..a3455f1d67fae 100644 --- a/drivers/nvme/common/auth.c +++ b/drivers/nvme/common/auth.c @@ -471,4 +471,5 @@ int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key) } EXPORT_SYMBOL_GPL(nvme_auth_generate_key); +MODULE_DESCRIPTION("NVMe Authentication framework"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c index a5c0431c101cf..6f7e7a8fa5ae4 100644 --- a/drivers/nvme/common/keyring.c +++ b/drivers/nvme/common/keyring.c @@ -181,5 +181,6 @@ static void __exit nvme_keyring_exit(void) MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Hannes Reinecke "); +MODULE_DESCRIPTION("NVMe Keyring implementation"); module_init(nvme_keyring_init); module_exit(nvme_keyring_exit); -- GitLab From 4b6821940eeb238a0cc9af322e9ebe8e12613f6a Mon Sep 17 00:00:00 2001 From: Caleb Sander Date: Wed, 31 Jan 2024 09:43:11 -0700 Subject: [PATCH 0457/1405] nvme: return string as char *, not unsigned char * The functions in drivers/nvme/host/constants.c returning human-readable status and opcode strings currently use type "const unsigned char *". Typically string constants use type "const char *", so remove "unsigned" from the return types. This is a purely cosmetic change to clarify that the functions return text strings instead of an array of bytes, for example. Signed-off-by: Caleb Sander Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/constants.c | 8 ++++---- drivers/nvme/host/nvme.h | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c index 20f46c230885c..8791283ec6ad2 100644 --- a/drivers/nvme/host/constants.c +++ b/drivers/nvme/host/constants.c @@ -171,7 +171,7 @@ static const char * const nvme_statuses[] = { [NVME_SC_HOST_ABORTED_CMD] = "Host Aborted Command", }; -const unsigned char *nvme_get_error_status_str(u16 status) +const char *nvme_get_error_status_str(u16 status) { status &= 0x7ff; if (status < ARRAY_SIZE(nvme_statuses) && nvme_statuses[status]) @@ -179,7 +179,7 @@ const unsigned char *nvme_get_error_status_str(u16 status) return "Unknown"; } -const unsigned char *nvme_get_opcode_str(u8 opcode) +const char *nvme_get_opcode_str(u8 opcode) { if (opcode < ARRAY_SIZE(nvme_ops) && nvme_ops[opcode]) return nvme_ops[opcode]; @@ -187,7 +187,7 @@ const unsigned char *nvme_get_opcode_str(u8 opcode) } EXPORT_SYMBOL_GPL(nvme_get_opcode_str); -const unsigned char *nvme_get_admin_opcode_str(u8 opcode) +const char *nvme_get_admin_opcode_str(u8 opcode) { if (opcode < ARRAY_SIZE(nvme_admin_ops) && nvme_admin_ops[opcode]) return nvme_admin_ops[opcode]; @@ -195,7 +195,7 @@ const unsigned char *nvme_get_admin_opcode_str(u8 opcode) } EXPORT_SYMBOL_GPL(nvme_get_admin_opcode_str); -const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode) { +const char *nvme_get_fabrics_opcode_str(u8 opcode) { if (opcode < ARRAY_SIZE(nvme_fabrics_ops) && nvme_fabrics_ops[opcode]) return nvme_fabrics_ops[opcode]; return "Unknown"; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 945bf15dccec6..acdf0e5e7e9aa 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1140,31 +1140,31 @@ static inline bool nvme_multi_css(struct nvme_ctrl *ctrl) } #ifdef CONFIG_NVME_VERBOSE_ERRORS -const unsigned char *nvme_get_error_status_str(u16 status); -const unsigned char *nvme_get_opcode_str(u8 opcode); -const unsigned char *nvme_get_admin_opcode_str(u8 opcode); -const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode); +const char *nvme_get_error_status_str(u16 status); +const char *nvme_get_opcode_str(u8 opcode); +const char *nvme_get_admin_opcode_str(u8 opcode); +const char *nvme_get_fabrics_opcode_str(u8 opcode); #else /* CONFIG_NVME_VERBOSE_ERRORS */ -static inline const unsigned char *nvme_get_error_status_str(u16 status) +static inline const char *nvme_get_error_status_str(u16 status) { return "I/O Error"; } -static inline const unsigned char *nvme_get_opcode_str(u8 opcode) +static inline const char *nvme_get_opcode_str(u8 opcode) { return "I/O Cmd"; } -static inline const unsigned char *nvme_get_admin_opcode_str(u8 opcode) +static inline const char *nvme_get_admin_opcode_str(u8 opcode) { return "Admin Cmd"; } -static inline const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode) +static inline const char *nvme_get_fabrics_opcode_str(u8 opcode) { return "Fabrics Cmd"; } #endif /* CONFIG_NVME_VERBOSE_ERRORS */ -static inline const unsigned char *nvme_opcode_str(int qid, u8 opcode, u8 fctype) +static inline const char *nvme_opcode_str(int qid, u8 opcode, u8 fctype) { if (opcode == nvme_fabrics_command) return nvme_get_fabrics_opcode_str(fctype); -- GitLab From 6f9a71c61183d6849d5aeab085b210c10398af9a Mon Sep 17 00:00:00 2001 From: Caleb Sander Date: Wed, 31 Jan 2024 09:43:12 -0700 Subject: [PATCH 0458/1405] nvme: remove redundant status mask In nvme_get_error_status_str(), the status code is already masked with 0x7ff at the beginning of the function. Don't bother masking it again when indexing nvme_statuses. Signed-off-by: Caleb Sander Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/constants.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c index 8791283ec6ad2..6f2ebb5fcdb05 100644 --- a/drivers/nvme/host/constants.c +++ b/drivers/nvme/host/constants.c @@ -175,7 +175,7 @@ const char *nvme_get_error_status_str(u16 status) { status &= 0x7ff; if (status < ARRAY_SIZE(nvme_statuses) && nvme_statuses[status]) - return nvme_statuses[status & 0x7ff]; + return nvme_statuses[status]; return "Unknown"; } -- GitLab From f9e9115d0c014dec3278d68823eaff159f98f4d6 Mon Sep 17 00:00:00 2001 From: Caleb Sander Date: Wed, 31 Jan 2024 09:43:13 -0700 Subject: [PATCH 0459/1405] nvme: take const cmd pointer in read-only helpers nvme_is_fabrics() and nvme_is_write() only read struct nvme_command, so take it by const pointer. This allows callers to pass a const pointer and communicates that these functions don't modify the command. Signed-off-by: Caleb Sander Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- include/linux/nvme.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 68eff8c86ce34..bc605ec4a3fd0 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1812,7 +1812,7 @@ struct nvme_command { }; }; -static inline bool nvme_is_fabrics(struct nvme_command *cmd) +static inline bool nvme_is_fabrics(const struct nvme_command *cmd) { return cmd->common.opcode == nvme_fabrics_command; } @@ -1831,7 +1831,7 @@ struct nvme_error_slot { __u8 resv2[24]; }; -static inline bool nvme_is_write(struct nvme_command *cmd) +static inline bool nvme_is_write(const struct nvme_command *cmd) { /* * What a mess... -- GitLab From d8f5df1fcea54923b74558035b8de8fb2da3e816 Mon Sep 17 00:00:00 2001 From: Mohammad Nassiri Date: Tue, 30 Jan 2024 03:51:52 +0000 Subject: [PATCH 0460/1405] selftests/net: Argument value mismatch when calling verify_counters() The end_server() function only operates in the server thread and always takes an accept socket instead of a listen socket as its input argument. To align with this, invert the boolean values used when calling verify_counters() within the end_server() function. As a result of this typo, the test didn't correctly check for the non-symmetrical scenario, where i.e. peer-A uses a key <100:200> to send data, but peer-B uses another key <105:205> to send its data. So, in simple words, different keys for TX and RX. Fixes: 3c3ead555648 ("selftests/net: Add TCP-AO key-management test") Signed-off-by: Mohammad Nassiri Link: https://lore.kernel.org/all/934627c5-eebb-4626-be23-cfb134c01d1a@arista.com/ [amended 'Fixes' tag, added the issue description and carried-over to lkml] Signed-off-by: Dmitry Safonov Link: https://lore.kernel.org/r/20240130-tcp-ao-test-key-mgmt-v2-1-d190430a6c60@arista.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tcp_ao/key-management.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c index c48b4970ca17e..f6a9395e3cd7d 100644 --- a/tools/testing/selftests/net/tcp_ao/key-management.c +++ b/tools/testing/selftests/net/tcp_ao/key-management.c @@ -843,7 +843,7 @@ static void end_server(const char *tst_name, int sk, synchronize_threads(); /* 4: verified => closed */ close(sk); - verify_counters(tst_name, true, false, begin, &end); + verify_counters(tst_name, false, true, begin, &end); synchronize_threads(); /* 5: counters */ } -- GitLab From 384aa16d3776a9ca5c0c1f1e7af4030fe176a993 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 30 Jan 2024 03:51:53 +0000 Subject: [PATCH 0461/1405] selftests/net: Rectify key counters checks As the names of (struct test_key) members didn't reflect whether the key was used for TX or RX, the verification for the counters was done incorrectly for asymmetrical selftests. Rename these with _tx appendix and fix checks in verify_counters(). While at it, as the checks are now correct, introduce skip_counters_checks, which is intended for tests where it's expected that a key that was set with setsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, ...) might had no chance of getting used on the wire. Fixes the following failures, exposed by the previous commit: > not ok 51 server: Check current != rnext keys set before connect(): Counter pkt_good was expected to increase 0 => 0 for key 132:5 > not ok 52 server: Check current != rnext keys set before connect(): Counter pkt_good was not expected to increase 0 => 21 for key 137:10 > > not ok 63 server: Check current flapping back on peer's RnextKey request: Counter pkt_good was expected to increase 0 => 0 for key 132:5 > not ok 64 server: Check current flapping back on peer's RnextKey request: Counter pkt_good was not expected to increase 0 => 40 for key 137:10 Cc: Mohammad Nassiri Fixes: 3c3ead555648 ("selftests/net: Add TCP-AO key-management test") Signed-off-by: Dmitry Safonov Link: https://lore.kernel.org/r/20240130-tcp-ao-test-key-mgmt-v2-2-d190430a6c60@arista.com Signed-off-by: Jakub Kicinski --- .../selftests/net/tcp_ao/key-management.c | 44 +++++++++++-------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c index f6a9395e3cd7d..24e62120b7924 100644 --- a/tools/testing/selftests/net/tcp_ao/key-management.c +++ b/tools/testing/selftests/net/tcp_ao/key-management.c @@ -417,9 +417,9 @@ struct test_key { matches_vrf : 1, is_current : 1, is_rnext : 1, - used_on_handshake : 1, - used_after_accept : 1, - used_on_client : 1; + used_on_server_tx : 1, + used_on_client_tx : 1, + skip_counters_checks : 1; }; struct key_collection { @@ -609,16 +609,14 @@ static int key_collection_socket(bool server, unsigned int port) addr = &this_ip_dest; sndid = key->client_keyid; rcvid = key->server_keyid; - set_current = key->is_current; - set_rnext = key->is_rnext; + key->used_on_client_tx = set_current = key->is_current; + key->used_on_server_tx = set_rnext = key->is_rnext; } if (test_add_key_cr(sk, key->password, key->len, *addr, vrf, sndid, rcvid, key->maclen, key->alg, set_current, set_rnext)) test_key_error("setsockopt(TCP_AO_ADD_KEY)", key); - if (set_current || set_rnext) - key->used_on_handshake = 1; #ifdef DEBUG test_print("%s [%u/%u] key: { %s, %u:%u, %u, %u:%u:%u:%u (%u)}", server ? "server" : "client", i, collection.nr_keys, @@ -640,22 +638,22 @@ static void verify_counters(const char *tst_name, bool is_listen_sk, bool server for (i = 0; i < collection.nr_keys; i++) { struct test_key *key = &collection.keys[i]; uint8_t sndid, rcvid; - bool was_used; + bool rx_cnt_expected; + if (key->skip_counters_checks) + continue; if (server) { sndid = key->server_keyid; rcvid = key->client_keyid; - if (is_listen_sk) - was_used = key->used_on_handshake; - else - was_used = key->used_after_accept; + rx_cnt_expected = key->used_on_client_tx; } else { sndid = key->client_keyid; rcvid = key->server_keyid; - was_used = key->used_on_client; + rx_cnt_expected = key->used_on_server_tx; } - test_tcp_ao_key_counters_cmp(tst_name, a, b, was_used, + test_tcp_ao_key_counters_cmp(tst_name, a, b, + rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0, sndid, rcvid); } test_tcp_ao_counters_free(a); @@ -916,9 +914,8 @@ static int run_client(const char *tst_name, unsigned int port, current_index = nr_keys - 1; if (rnext_index < 0) rnext_index = nr_keys - 1; - collection.keys[current_index].used_on_handshake = 1; - collection.keys[rnext_index].used_after_accept = 1; - collection.keys[rnext_index].used_on_client = 1; + collection.keys[current_index].used_on_client_tx = 1; + collection.keys[rnext_index].used_on_server_tx = 1; synchronize_threads(); /* 3: accepted => send data */ if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) { @@ -1059,7 +1056,16 @@ static void check_current_back(const char *tst_name, unsigned int port, test_error("Can't change the current key"); if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) test_fail("verify failed"); - collection.keys[rotate_to_index].used_after_accept = 1; + /* There is a race here: between setting the current_key with + * setsockopt(TCP_AO_INFO) and starting to send some data - there + * might have been a segment received with the desired + * RNext_key set. In turn that would mean that the first outgoing + * segment will have the desired current_key (flipped back). + * Which is what the user/test wants. As it's racy, skip checking + * the counters, yet check what are the resulting current/rnext + * keys on both sides. + */ + collection.keys[rotate_to_index].skip_counters_checks = 1; end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); } @@ -1089,7 +1095,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port, } verify_current_rnext(tst_name, sk, -1, collection.keys[i].server_keyid); - collection.keys[i].used_on_client = 1; + collection.keys[i].used_on_server_tx = 1; synchronize_threads(); /* verify current/rnext */ } end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); -- GitLab From 6caf3adcc877b3470e98133d52b360ebe5f7a6a3 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 30 Jan 2024 03:51:54 +0000 Subject: [PATCH 0462/1405] selftests/net: Repair RST passive reset selftest Currently, the test is racy and seems to not pass anymore. In order to rectify it, aim on TCP_TW_RST. Doesn't seem way too good with this sleep() part, but it seems as a reasonable compromise for the test. There is a plan in-line comment on how-to improve it, going to do it on the top, at this moment I want it to run on netdev/patchwork selftests dashboard. It also slightly changes tcp_ao-lib in order to get SO_ERROR propagated to test_client_verify() return value. Fixes: c6df7b2361d7 ("selftests/net: Add TCP-AO RST test") Signed-off-by: Dmitry Safonov Link: https://lore.kernel.org/r/20240130-tcp-ao-test-key-mgmt-v2-3-d190430a6c60@arista.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tcp_ao/lib/sock.c | 12 +- tools/testing/selftests/net/tcp_ao/rst.c | 138 ++++++++++++------ 2 files changed, 98 insertions(+), 52 deletions(-) diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c index c75d82885a2e1..15aeb0963058f 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/sock.c +++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c @@ -62,7 +62,9 @@ int test_wait_fd(int sk, time_t sec, bool write) return -ETIMEDOUT; } - if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &ret, &slen) || ret) + if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &ret, &slen)) + return -errno; + if (ret) return -ret; return 0; } @@ -584,9 +586,11 @@ int test_client_verify(int sk, const size_t msg_len, const size_t nr, { size_t buf_sz = msg_len * nr; char *buf = alloca(buf_sz); + ssize_t ret; randomize_buffer(buf, buf_sz); - if (test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec) != buf_sz) - return -1; - return 0; + ret = test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec); + if (ret < 0) + return (int)ret; + return ret != buf_sz ? -1 : 0; } diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c index ac06009a7f5f6..7df8b8700e39e 100644 --- a/tools/testing/selftests/net/tcp_ao/rst.c +++ b/tools/testing/selftests/net/tcp_ao/rst.c @@ -1,10 +1,33 @@ // SPDX-License-Identifier: GPL-2.0 -/* Author: Dmitry Safonov */ +/* + * The test checks that both active and passive reset have correct TCP-AO + * signature. An "active" reset (abort) here is procured from closing + * listen() socket with non-accepted connections in the queue: + * inet_csk_listen_stop() => inet_child_forget() => + * => tcp_disconnect() => tcp_send_active_reset() + * + * The passive reset is quite hard to get on established TCP connections. + * It could be procured from non-established states, but the synchronization + * part from userspace in order to reliably get RST seems uneasy. + * So, instead it's procured by corrupting SEQ number on TIMED-WAIT state. + * + * It's important to test both passive and active RST as they go through + * different code-paths: + * - tcp_send_active_reset() makes no-data skb, sends it with tcp_transmit_skb() + * - tcp_v*_send_reset() create their reply skbs and send them with + * ip_send_unicast_reply() + * + * In both cases TCP-AO signatures have to be correct, which is verified by + * (1) checking that the TCP-AO connection was reset and (2) TCP-AO counters. + * + * Author: Dmitry Safonov + */ #include #include "../../../../include/linux/kernel.h" #include "aolib.h" const size_t quota = 1000; +const size_t packet_sz = 100; /* * Backlog == 0 means 1 connection in queue, see: * commit 64a146513f8f ("[NET]: Revert incorrect accept queue...") @@ -59,26 +82,6 @@ static void close_forced(int sk) close(sk); } -static int test_wait_for_exception(int sk, time_t sec) -{ - struct timeval tv = { .tv_sec = sec }; - struct timeval *ptv = NULL; - fd_set efds; - int ret; - - FD_ZERO(&efds); - FD_SET(sk, &efds); - - if (sec) - ptv = &tv; - - errno = 0; - ret = select(sk + 1, NULL, NULL, &efds, ptv); - if (ret < 0) - return -errno; - return ret ? sk : 0; -} - static void test_server_active_rst(unsigned int port) { struct tcp_ao_counters cnt1, cnt2; @@ -155,17 +158,16 @@ static void test_server_passive_rst(unsigned int port) test_fail("server returned %zd", bytes); } - synchronize_threads(); /* 3: chekpoint/restore the connection */ + synchronize_threads(); /* 3: checkpoint the client */ + synchronize_threads(); /* 4: close the server, creating twsk */ if (test_get_tcp_ao_counters(sk, &ao2)) test_error("test_get_tcp_ao_counters()"); - - synchronize_threads(); /* 4: terminate server + send more on client */ - bytes = test_server_run(sk, quota, TEST_RETRANSMIT_SEC); close(sk); + + synchronize_threads(); /* 5: restore the socket, send more data */ test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD); - synchronize_threads(); /* 5: verified => closed */ - close(sk); + synchronize_threads(); /* 6: server exits */ } static void *server_fn(void *arg) @@ -284,7 +286,7 @@ static void test_client_active_rst(unsigned int port) test_error("test_wait_fds(): %d", err); synchronize_threads(); /* 3: close listen socket */ - if (test_client_verify(sk[0], 100, quota / 100, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) test_fail("Failed to send data on connected socket"); else test_ok("Verified established tcp connection"); @@ -323,7 +325,6 @@ static void test_client_passive_rst(unsigned int port) struct tcp_sock_state img; sockaddr_af saddr; int sk, err; - socklen_t slen = sizeof(err); sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); if (sk < 0) @@ -337,18 +338,51 @@ static void test_client_passive_rst(unsigned int port) test_error("failed to connect()"); synchronize_threads(); /* 2: accepted => send data */ - if (test_client_verify(sk, 100, quota / 100, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk, packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) test_fail("Failed to send data on connected socket"); else test_ok("Verified established tcp connection"); - synchronize_threads(); /* 3: chekpoint/restore the connection */ + synchronize_threads(); /* 3: checkpoint the client */ test_enable_repair(sk); test_sock_checkpoint(sk, &img, &saddr); test_ao_checkpoint(sk, &ao_img); - test_kill_sk(sk); + test_disable_repair(sk); - img.out.seq += quota; + synchronize_threads(); /* 4: close the server, creating twsk */ + + /* + * The "corruption" in SEQ has to be small enough to fit into TCP + * window, see tcp_timewait_state_process() for out-of-window + * segments. + */ + img.out.seq += 5; /* 5 is more noticeable in tcpdump than 1 */ + + /* + * FIXME: This is kind-of ugly and dirty, but it works. + * + * At this moment, the server has close'ed(sk). + * The passive RST that is being targeted here is new data after + * half-duplex close, see tcp_timewait_state_process() => TCP_TW_RST + * + * What is needed here is: + * (1) wait for FIN from the server + * (2) make sure that the ACK from the client went out + * (3) make sure that the ACK was received and processed by the server + * + * Otherwise, the data that will be sent from "repaired" socket + * post SEQ corruption may get to the server before it's in + * TCP_FIN_WAIT2. + * + * (1) is easy with select()/poll() + * (2) is possible by polling tcpi_state from TCP_INFO + * (3) is quite complex: as server's socket was already closed, + * probably the way to do it would be tcp-diag. + */ + sleep(TEST_RETRANSMIT_SEC); + + synchronize_threads(); /* 5: restore the socket, send more data */ + test_kill_sk(sk); sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); if (sk < 0) @@ -366,25 +400,33 @@ static void test_client_passive_rst(unsigned int port) test_disable_repair(sk); test_sock_state_free(&img); - synchronize_threads(); /* 4: terminate server + send more on client */ - if (test_client_verify(sk, 100, quota / 100, 2 * TEST_TIMEOUT_SEC)) - test_ok("client connection broken post-seq-adjust"); - else - test_fail("client connection still works post-seq-adjust"); - - test_wait_for_exception(sk, TEST_TIMEOUT_SEC); - - if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &err, &slen)) - test_error("getsockopt()"); - if (err != ECONNRESET && err != EPIPE) - test_fail("client connection was not reset: %d", err); + /* + * This is how "passive reset" is acquired in this test from TCP_TW_RST: + * + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [P.], seq 901:1001, ack 1001, win 249, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x10217d6c36a22379086ef3b1], length 100 + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [F.], seq 1001, ack 1001, win 249, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x104ffc99b98c10a5298cc268], length 0 + * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [.], ack 1002, win 251, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0xe496dd4f7f5a8a66873c6f93,nop,nop,sack 1 {1001:1002}], length 0 + * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [P.], seq 1006:1106, ack 1001, win 251, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x1b5f3330fb23fbcd0c77d0ca], length 100 + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [R], seq 3215596252, win 0, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x0bcfbbf497bce844312304b2], length 0 + */ + err = test_client_verify(sk, packet_sz, quota / packet_sz, 2 * TEST_TIMEOUT_SEC); + /* Make sure that the connection was reset, not timeouted */ + if (err && err == -ECONNRESET) + test_ok("client sock was passively reset post-seq-adjust"); + else if (err) + test_fail("client sock was not reset post-seq-adjust: %d", err); else - test_ok("client connection was reset"); + test_fail("client sock is yet connected post-seq-adjust"); if (test_get_tcp_ao_counters(sk, &ao2)) test_error("test_get_tcp_ao_counters()"); - synchronize_threads(); /* 5: verified => closed */ + synchronize_threads(); /* 6: server exits */ close(sk); test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD); } @@ -410,6 +452,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(15, server_fn, client_fn); + test_init(14, server_fn, client_fn); return 0; } -- GitLab From 7d23e836b00eec96610141549f59e5902dc55fe8 Mon Sep 17 00:00:00 2001 From: Caleb Sander Date: Wed, 31 Jan 2024 09:43:14 -0700 Subject: [PATCH 0463/1405] nvme: split out fabrics version of nvme_opcode_str() nvme_opcode_str() currently supports admin, IO, and fabrics commands. However, fabrics commands aren't allowed for the pci transport. Currently the pci caller passes 0 as the fctype, which means any fabrics command would be displayed as "Property Set". Move fabrics command support into a function nvme_fabrics_opcode_str() and remove the fctype argument to nvme_opcode_str(). This way, a fabrics command will display as "Unknown" for pci. Convert the rdma and tcp transports to use nvme_fabrics_opcode_str(). Signed-off-by: Caleb Sander Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/nvme.h | 13 ++++++++++--- drivers/nvme/host/pci.c | 2 +- drivers/nvme/host/rdma.c | 7 +++---- drivers/nvme/host/tcp.c | 6 +++--- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index acdf0e5e7e9aa..b70b333a0874f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1164,11 +1164,18 @@ static inline const char *nvme_get_fabrics_opcode_str(u8 opcode) } #endif /* CONFIG_NVME_VERBOSE_ERRORS */ -static inline const char *nvme_opcode_str(int qid, u8 opcode, u8 fctype) +static inline const char *nvme_opcode_str(int qid, u8 opcode) { - if (opcode == nvme_fabrics_command) - return nvme_get_fabrics_opcode_str(fctype); return qid ? nvme_get_opcode_str(opcode) : nvme_get_admin_opcode_str(opcode); } + +static inline const char *nvme_fabrics_opcode_str( + int qid, const struct nvme_command *cmd) +{ + if (nvme_is_fabrics(cmd)) + return nvme_get_fabrics_opcode_str(cmd->fabrics.fctype); + + return nvme_opcode_str(qid, cmd->common.opcode); +} #endif /* _NVME_H */ diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 25eb727795417..e6267a6aa3801 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1349,7 +1349,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) dev_warn(dev->ctrl.device, "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n", req->tag, nvme_cid(req), opcode, - nvme_opcode_str(nvmeq->qid, opcode, 0), nvmeq->qid); + nvme_opcode_str(nvmeq->qid, opcode), nvmeq->qid); nvme_req(req)->flags |= NVME_REQ_CANCELLED; goto disable; } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 3f393ee202811..6adf2c19a7120 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1951,14 +1951,13 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq) struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_queue *queue = req->queue; struct nvme_rdma_ctrl *ctrl = queue->ctrl; - u8 opcode = req->req.cmd->common.opcode; - u8 fctype = req->req.cmd->fabrics.fctype; + struct nvme_command *cmd = req->req.cmd; int qid = nvme_rdma_queue_idx(queue); dev_warn(ctrl->ctrl.device, "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout\n", - rq->tag, nvme_cid(rq), opcode, - nvme_opcode_str(qid, opcode, fctype), qid); + rq->tag, nvme_cid(rq), cmd->common.opcode, + nvme_fabrics_opcode_str(qid, cmd), qid); if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) { /* diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 4393cf244025e..9f8dea2e2c7d5 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2428,13 +2428,13 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq) struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); - u8 opc = pdu->cmd.common.opcode, fctype = pdu->cmd.fabrics.fctype; + struct nvme_command *cmd = &pdu->cmd; int qid = nvme_tcp_queue_id(req->queue); dev_warn(ctrl->device, "I/O tag %d (%04x) type %d opcode %#x (%s) QID %d timeout\n", - rq->tag, nvme_cid(rq), pdu->hdr.type, opc, - nvme_opcode_str(qid, opc, fctype), qid); + rq->tag, nvme_cid(rq), pdu->hdr.type, cmd->common.opcode, + nvme_fabrics_opcode_str(qid, cmd), qid); if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) { /* -- GitLab From 0d150beff26e636aa07ab889133a0015eaee4227 Mon Sep 17 00:00:00 2001 From: Caleb Sander Date: Wed, 31 Jan 2024 09:43:15 -0700 Subject: [PATCH 0464/1405] nvme-fc: log human-readable opcode on timeout The fc transport logs the opcode and fctype on command timeout. This is sufficient information to identify the command issued, but not very human-readable. Use the nvme_fabrics_opcode_str() helper to also log the name of the command, as rdma and tcp already do. Signed-off-by: Caleb Sander Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e2308119f8f0b..63a2e2839a789 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2574,6 +2574,7 @@ static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_ctrl *ctrl = op->ctrl; + u16 qnum = op->queue->qnum; struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; struct nvme_command *sqe = &cmdiu->sqe; @@ -2582,10 +2583,11 @@ static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq) * will detect the aborted io and will fail the connection. */ dev_info(ctrl->ctrl.device, - "NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: " + "NVME-FC{%d.%d}: io timeout: opcode %d fctype %d (%s) w10/11: " "x%08x/x%08x\n", - ctrl->cnum, op->queue->qnum, sqe->common.opcode, - sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11); + ctrl->cnum, qnum, sqe->common.opcode, sqe->fabrics.fctype, + nvme_fabrics_opcode_str(qnum, sqe), + sqe->common.cdw10, sqe->common.cdw11); if (__nvme_fc_abort_op(ctrl, op)) nvme_fc_error_recovery(ctrl, "io timeout abort failed"); -- GitLab From 4d322dce82a1d44f8c83f0f54f95dd1b8dcf46c9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Jan 2024 18:42:35 +0000 Subject: [PATCH 0465/1405] af_unix: fix lockdep positive in sk_diag_dump_icons() syzbot reported a lockdep splat [1]. Blamed commit hinted about the possible lockdep violation, and code used unix_state_lock_nested() in an attempt to silence lockdep. It is not sufficient, because unix_state_lock_nested() is already used from unix_state_double_lock(). We need to use a separate subclass. This patch adds a distinct enumeration to make things more explicit. Also use swap() in unix_state_double_lock() as a clean up. v2: add a missing inline keyword to unix_state_lock_nested() [1] WARNING: possible circular locking dependency detected 6.8.0-rc1-syzkaller-00356-g8a696a29c690 #0 Not tainted syz-executor.1/2542 is trying to acquire lock: ffff88808b5df9e8 (rlock-AF_UNIX){+.+.}-{2:2}, at: skb_queue_tail+0x36/0x120 net/core/skbuff.c:3863 but task is already holding lock: ffff88808b5dfe70 (&u->lock/1){+.+.}-{2:2}, at: unix_dgram_sendmsg+0xfc7/0x2200 net/unix/af_unix.c:2089 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&u->lock/1){+.+.}-{2:2}: lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 _raw_spin_lock_nested+0x31/0x40 kernel/locking/spinlock.c:378 sk_diag_dump_icons net/unix/diag.c:87 [inline] sk_diag_fill+0x6ea/0xfe0 net/unix/diag.c:157 sk_diag_dump net/unix/diag.c:196 [inline] unix_diag_dump+0x3e9/0x630 net/unix/diag.c:220 netlink_dump+0x5c1/0xcd0 net/netlink/af_netlink.c:2264 __netlink_dump_start+0x5d7/0x780 net/netlink/af_netlink.c:2370 netlink_dump_start include/linux/netlink.h:338 [inline] unix_diag_handler_dump+0x1c3/0x8f0 net/unix/diag.c:319 sock_diag_rcv_msg+0xe3/0x400 netlink_rcv_skb+0x1df/0x430 net/netlink/af_netlink.c:2543 sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:280 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7e6/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa37/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] sock_write_iter+0x39a/0x520 net/socket.c:1160 call_write_iter include/linux/fs.h:2085 [inline] new_sync_write fs/read_write.c:497 [inline] vfs_write+0xa74/0xca0 fs/read_write.c:590 ksys_write+0x1a0/0x2c0 fs/read_write.c:643 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b -> #0 (rlock-AF_UNIX){+.+.}-{2:2}: check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x1909/0x5ab0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0xd5/0x120 kernel/locking/spinlock.c:162 skb_queue_tail+0x36/0x120 net/core/skbuff.c:3863 unix_dgram_sendmsg+0x15d9/0x2200 net/unix/af_unix.c:2112 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] ____sys_sendmsg+0x592/0x890 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmmsg+0x3b2/0x730 net/socket.c:2724 __do_sys_sendmmsg net/socket.c:2753 [inline] __se_sys_sendmmsg net/socket.c:2750 [inline] __x64_sys_sendmmsg+0xa0/0xb0 net/socket.c:2750 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&u->lock/1); lock(rlock-AF_UNIX); lock(&u->lock/1); lock(rlock-AF_UNIX); *** DEADLOCK *** 1 lock held by syz-executor.1/2542: #0: ffff88808b5dfe70 (&u->lock/1){+.+.}-{2:2}, at: unix_dgram_sendmsg+0xfc7/0x2200 net/unix/af_unix.c:2089 stack backtrace: CPU: 1 PID: 2542 Comm: syz-executor.1 Not tainted 6.8.0-rc1-syzkaller-00356-g8a696a29c690 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e7/0x2d0 lib/dump_stack.c:106 check_noncircular+0x366/0x490 kernel/locking/lockdep.c:2187 check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x1909/0x5ab0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0xd5/0x120 kernel/locking/spinlock.c:162 skb_queue_tail+0x36/0x120 net/core/skbuff.c:3863 unix_dgram_sendmsg+0x15d9/0x2200 net/unix/af_unix.c:2112 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] ____sys_sendmsg+0x592/0x890 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmmsg+0x3b2/0x730 net/socket.c:2724 __do_sys_sendmmsg net/socket.c:2753 [inline] __se_sys_sendmmsg net/socket.c:2750 [inline] __x64_sys_sendmmsg+0xa0/0xb0 net/socket.c:2750 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf5/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b RIP: 0033:0x7f26d887cda9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f26d95a60c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 00007f26d89abf80 RCX: 00007f26d887cda9 RDX: 000000000000003e RSI: 00000000200bd000 RDI: 0000000000000004 RBP: 00007f26d88c947a R08: 0000000000000000 R09: 0000000000000000 R10: 00000000000008c0 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007f26d89abf80 R15: 00007ffcfe081a68 Fixes: 2aac7a2cb0d9 ("unix_diag: Pending connections IDs NLA") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240130184235.1620738-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 20 ++++++++++++++------ net/unix/af_unix.c | 14 ++++++-------- net/unix/diag.c | 2 +- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 49c4640027d8a..afd40dce40f3d 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -46,12 +46,6 @@ struct scm_stat { #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) -#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) -#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) -#define unix_state_lock_nested(s) \ - spin_lock_nested(&unix_sk(s)->lock, \ - SINGLE_DEPTH_NESTING) - /* The AF_UNIX socket */ struct unix_sock { /* WARNING: sk has to be the first member */ @@ -77,6 +71,20 @@ struct unix_sock { #define unix_sk(ptr) container_of_const(ptr, struct unix_sock, sk) #define unix_peer(sk) (unix_sk(sk)->peer) +#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) +#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) +enum unix_socket_lock_class { + U_LOCK_NORMAL, + U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */ + U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ +}; + +static inline void unix_state_lock_nested(struct sock *sk, + enum unix_socket_lock_class subclass) +{ + spin_lock_nested(&unix_sk(sk)->lock, subclass); +} + #define peer_wait peer_wq.wait long unix_inq_len(struct sock *sk); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ac1f2bc18fc96..30b178ebba60a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1344,13 +1344,11 @@ static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) unix_state_lock(sk1); return; } - if (sk1 < sk2) { - unix_state_lock(sk1); - unix_state_lock_nested(sk2); - } else { - unix_state_lock(sk2); - unix_state_lock_nested(sk1); - } + if (sk1 > sk2) + swap(sk1, sk2); + + unix_state_lock(sk1); + unix_state_lock_nested(sk2, U_LOCK_SECOND); } static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) @@ -1591,7 +1589,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, goto out_unlock; } - unix_state_lock_nested(sk); + unix_state_lock_nested(sk, U_LOCK_SECOND); if (sk->sk_state != st) { unix_state_unlock(sk); diff --git a/net/unix/diag.c b/net/unix/diag.c index bec09a3a1d44c..be19827eca36d 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) * queue lock. With the other's queue locked it's * OK to lock the state. */ - unix_state_lock_nested(req); + unix_state_lock_nested(req, U_LOCK_DIAG); peer = unix_sk(req)->peer; buf[i++] = (peer ? sock_i_ino(peer) : 0); unix_state_unlock(req); -- GitLab From d9407ff11809c6812bb84fe7be9c1367d758e5c8 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 29 Jan 2024 15:40:30 -0800 Subject: [PATCH 0466/1405] pds_core: Prevent health thread from running during reset/remove The PCIe reset handlers can run at the same time as the health thread. This can cause the health thread to stomp on the PCIe reset. Fix this by preventing the health thread from running while a PCIe reset is happening. As part of this use timer_shutdown_sync() during reset and remove to make sure the timer doesn't ever get rearmed. Fixes: ffa55858330f ("pds_core: implement pci reset handlers") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240129234035.69802-2-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/main.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index 3080898d7b95b..5172a5ad8ec6d 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -293,7 +293,7 @@ static int pdsc_init_pf(struct pdsc *pdsc) err_out_teardown: pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING); err_out_unmap_bars: - del_timer_sync(&pdsc->wdtimer); + timer_shutdown_sync(&pdsc->wdtimer); if (pdsc->wq) destroy_workqueue(pdsc->wq); mutex_destroy(&pdsc->config_lock); @@ -420,7 +420,7 @@ static void pdsc_remove(struct pci_dev *pdev) */ pdsc_sriov_configure(pdev, 0); - del_timer_sync(&pdsc->wdtimer); + timer_shutdown_sync(&pdsc->wdtimer); if (pdsc->wq) destroy_workqueue(pdsc->wq); @@ -445,10 +445,24 @@ static void pdsc_remove(struct pci_dev *pdev) devlink_free(dl); } +static void pdsc_stop_health_thread(struct pdsc *pdsc) +{ + timer_shutdown_sync(&pdsc->wdtimer); + if (pdsc->health_work.func) + cancel_work_sync(&pdsc->health_work); +} + +static void pdsc_restart_health_thread(struct pdsc *pdsc) +{ + timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0); + mod_timer(&pdsc->wdtimer, jiffies + 1); +} + void pdsc_reset_prepare(struct pci_dev *pdev) { struct pdsc *pdsc = pci_get_drvdata(pdev); + pdsc_stop_health_thread(pdsc); pdsc_fw_down(pdsc); pci_free_irq_vectors(pdev); @@ -486,6 +500,7 @@ void pdsc_reset_done(struct pci_dev *pdev) } pdsc_fw_up(pdsc); + pdsc_restart_health_thread(pdsc); } static const struct pci_error_handlers pdsc_err_handler = { -- GitLab From d321067e2cfa4d5e45401a00912ca9da8d1af631 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 29 Jan 2024 15:40:31 -0800 Subject: [PATCH 0467/1405] pds_core: Cancel AQ work on teardown There is a small window where pdsc_work_thread() calls pdsc_process_adminq() and pdsc_process_adminq() passes the PDSC_S_STOPPING_DRIVER check and starts to process adminq/notifyq work and then the driver starts a fw_down cycle. This could cause some undefined behavior if the notifyqcq/adminqcq are free'd while pdsc_process_adminq() is running. Use cancel_work_sync() on the adminqcq's work struct to make sure any pending work items are cancelled and any in progress work items are completed. Also, make sure to not call cancel_work_sync() if the work item has not be initialized. Without this, traces will happen in cases where a reset fails and teardown is called again or if reset fails and the driver is removed. Fixes: 01ba61b55b20 ("pds_core: Add adminq processing and commands") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240129234035.69802-3-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 0d2091e9eb283..b582729331ebf 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -464,6 +464,8 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing) if (!pdsc->pdev->is_virtfn) pdsc_devcmd_reset(pdsc); + if (pdsc->adminqcq.work.func) + cancel_work_sync(&pdsc->adminqcq.work); pdsc_qcq_free(pdsc, &pdsc->notifyqcq); pdsc_qcq_free(pdsc, &pdsc->adminqcq); -- GitLab From 951705151e50f9022bc96ec8b3fd5697380b1df6 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 29 Jan 2024 15:40:32 -0800 Subject: [PATCH 0468/1405] pds_core: Use struct pdsc for the pdsc_adminq_isr private data The initial design for the adminq interrupt was done based on client drivers having their own adminq and adminq interrupt. So, each client driver's adminq isr would use their specific adminqcq for the private data struct. For the time being the design has changed to only use a single adminq for all clients. So, instead use the struct pdsc for the private data to simplify things a bit. This also has the benefit of not dereferencing the adminqcq to access the pdsc struct when the PDSC_S_STOPPING_DRIVER bit is set and the adminqcq has actually been cleared/freed. Fixes: 01ba61b55b20 ("pds_core: Add adminq processing and commands") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240129234035.69802-4-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/adminq.c | 5 +++-- drivers/net/ethernet/amd/pds_core/core.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c index 5beadabc21361..68be5ea251fc1 100644 --- a/drivers/net/ethernet/amd/pds_core/adminq.c +++ b/drivers/net/ethernet/amd/pds_core/adminq.c @@ -135,8 +135,8 @@ void pdsc_work_thread(struct work_struct *work) irqreturn_t pdsc_adminq_isr(int irq, void *data) { - struct pdsc_qcq *qcq = data; - struct pdsc *pdsc = qcq->pdsc; + struct pdsc *pdsc = data; + struct pdsc_qcq *qcq; /* Don't process AdminQ when shutting down */ if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER)) { @@ -145,6 +145,7 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data) return IRQ_HANDLED; } + qcq = &pdsc->adminqcq; queue_work(pdsc->wq, &qcq->work); pds_core_intr_mask(&pdsc->intr_ctrl[qcq->intx], PDS_CORE_INTR_MASK_CLEAR); diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index b582729331ebf..0356e56a6e99e 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -125,7 +125,7 @@ static int pdsc_qcq_intr_alloc(struct pdsc *pdsc, struct pdsc_qcq *qcq) snprintf(name, sizeof(name), "%s-%d-%s", PDS_CORE_DRV_NAME, pdsc->pdev->bus->number, qcq->q.name); - index = pdsc_intr_alloc(pdsc, name, pdsc_adminq_isr, qcq); + index = pdsc_intr_alloc(pdsc, name, pdsc_adminq_isr, pdsc); if (index < 0) return index; qcq->intx = index; -- GitLab From 7e82a8745b951b1e794cc780d46f3fbee5e93447 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 29 Jan 2024 15:40:33 -0800 Subject: [PATCH 0469/1405] pds_core: Prevent race issues involving the adminq There are multiple paths that can result in using the pdsc's adminq. [1] pdsc_adminq_isr and the resulting work from queue_work(), i.e. pdsc_work_thread()->pdsc_process_adminq() [2] pdsc_adminq_post() When the device goes through reset via PCIe reset and/or a fw_down/fw_up cycle due to bad PCIe state or bad device state the adminq is destroyed and recreated. A NULL pointer dereference can happen if [1] or [2] happens after the adminq is already destroyed. In order to fix this, add some further state checks and implement reference counting for adminq uses. Reference counting was used because multiple threads can attempt to access the adminq at the same time via [1] or [2]. Additionally, multiple clients (i.e. pds-vfio-pci) can be using [2] at the same time. The adminq_refcnt is initialized to 1 when the adminq has been allocated and is ready to use. Users/clients of the adminq (i.e. [1] and [2]) will increment the refcnt when they are using the adminq. When the driver goes into a fw_down cycle it will set the PDSC_S_FW_DEAD bit and then wait for the adminq_refcnt to hit 1. Setting the PDSC_S_FW_DEAD before waiting will prevent any further adminq_refcnt increments. Waiting for the adminq_refcnt to hit 1 allows for any current users of the adminq to finish before the driver frees the adminq. Once the adminq_refcnt hits 1 the driver clears the refcnt to signify that the adminq is deleted and cannot be used. On the fw_up cycle the driver will once again initialize the adminq_refcnt to 1 allowing the adminq to be used again. Fixes: 01ba61b55b20 ("pds_core: Add adminq processing and commands") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240129234035.69802-5-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/adminq.c | 31 +++++++++++++++++----- drivers/net/ethernet/amd/pds_core/core.c | 21 +++++++++++++++ drivers/net/ethernet/amd/pds_core/core.h | 1 + 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c index 68be5ea251fc1..5edff33d56f36 100644 --- a/drivers/net/ethernet/amd/pds_core/adminq.c +++ b/drivers/net/ethernet/amd/pds_core/adminq.c @@ -63,6 +63,15 @@ static int pdsc_process_notifyq(struct pdsc_qcq *qcq) return nq_work; } +static bool pdsc_adminq_inc_if_up(struct pdsc *pdsc) +{ + if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER) || + pdsc->state & BIT_ULL(PDSC_S_FW_DEAD)) + return false; + + return refcount_inc_not_zero(&pdsc->adminq_refcnt); +} + void pdsc_process_adminq(struct pdsc_qcq *qcq) { union pds_core_adminq_comp *comp; @@ -75,9 +84,9 @@ void pdsc_process_adminq(struct pdsc_qcq *qcq) int aq_work = 0; int credits; - /* Don't process AdminQ when shutting down */ - if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER)) { - dev_err(pdsc->dev, "%s: called while PDSC_S_STOPPING_DRIVER\n", + /* Don't process AdminQ when it's not up */ + if (!pdsc_adminq_inc_if_up(pdsc)) { + dev_err(pdsc->dev, "%s: called while adminq is unavailable\n", __func__); return; } @@ -124,6 +133,7 @@ void pdsc_process_adminq(struct pdsc_qcq *qcq) pds_core_intr_credits(&pdsc->intr_ctrl[qcq->intx], credits, PDS_CORE_INTR_CRED_REARM); + refcount_dec(&pdsc->adminq_refcnt); } void pdsc_work_thread(struct work_struct *work) @@ -138,9 +148,9 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data) struct pdsc *pdsc = data; struct pdsc_qcq *qcq; - /* Don't process AdminQ when shutting down */ - if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER)) { - dev_err(pdsc->dev, "%s: called while PDSC_S_STOPPING_DRIVER\n", + /* Don't process AdminQ when it's not up */ + if (!pdsc_adminq_inc_if_up(pdsc)) { + dev_err(pdsc->dev, "%s: called while adminq is unavailable\n", __func__); return IRQ_HANDLED; } @@ -148,6 +158,7 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data) qcq = &pdsc->adminqcq; queue_work(pdsc->wq, &qcq->work); pds_core_intr_mask(&pdsc->intr_ctrl[qcq->intx], PDS_CORE_INTR_MASK_CLEAR); + refcount_dec(&pdsc->adminq_refcnt); return IRQ_HANDLED; } @@ -231,6 +242,12 @@ int pdsc_adminq_post(struct pdsc *pdsc, int err = 0; int index; + if (!pdsc_adminq_inc_if_up(pdsc)) { + dev_dbg(pdsc->dev, "%s: preventing adminq cmd %u\n", + __func__, cmd->opcode); + return -ENXIO; + } + wc.qcq = &pdsc->adminqcq; index = __pdsc_adminq_post(pdsc, &pdsc->adminqcq, cmd, comp, &wc); if (index < 0) { @@ -286,6 +303,8 @@ int pdsc_adminq_post(struct pdsc *pdsc, queue_work(pdsc->wq, &pdsc->health_work); } + refcount_dec(&pdsc->adminq_refcnt); + return err; } EXPORT_SYMBOL_GPL(pdsc_adminq_post); diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 0356e56a6e99e..f44333bd1256e 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -450,6 +450,7 @@ int pdsc_setup(struct pdsc *pdsc, bool init) pdsc_debugfs_add_viftype(pdsc); } + refcount_set(&pdsc->adminq_refcnt, 1); clear_bit(PDSC_S_FW_DEAD, &pdsc->state); return 0; @@ -514,6 +515,24 @@ void pdsc_stop(struct pdsc *pdsc) PDS_CORE_INTR_MASK_SET); } +static void pdsc_adminq_wait_and_dec_once_unused(struct pdsc *pdsc) +{ + /* The driver initializes the adminq_refcnt to 1 when the adminq is + * allocated and ready for use. Other users/requesters will increment + * the refcnt while in use. If the refcnt is down to 1 then the adminq + * is not in use and the refcnt can be cleared and adminq freed. Before + * calling this function the driver will set PDSC_S_FW_DEAD, which + * prevent subsequent attempts to use the adminq and increment the + * refcnt to fail. This guarantees that this function will eventually + * exit. + */ + while (!refcount_dec_if_one(&pdsc->adminq_refcnt)) { + dev_dbg_ratelimited(pdsc->dev, "%s: adminq in use\n", + __func__); + cpu_relax(); + } +} + void pdsc_fw_down(struct pdsc *pdsc) { union pds_core_notifyq_comp reset_event = { @@ -529,6 +548,8 @@ void pdsc_fw_down(struct pdsc *pdsc) if (pdsc->pdev->is_virtfn) return; + pdsc_adminq_wait_and_dec_once_unused(pdsc); + /* Notify clients of fw_down */ if (pdsc->fw_reporter) devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc); diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index e35d3e7006bfc..cbd5716f46e69 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -184,6 +184,7 @@ struct pdsc { struct mutex devcmd_lock; /* lock for dev_cmd operations */ struct mutex config_lock; /* lock for configuration operations */ spinlock_t adminq_lock; /* lock for adminq operations */ + refcount_t adminq_refcnt; struct pds_core_dev_info_regs __iomem *info_regs; struct pds_core_dev_cmd_regs __iomem *cmd_regs; struct pds_core_intr __iomem *intr_ctrl; -- GitLab From e96094c1d11cce4deb5da3c0500d49041ab845b8 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 29 Jan 2024 15:40:34 -0800 Subject: [PATCH 0470/1405] pds_core: Clear BARs on reset During reset the BARs might be accessed when they are unmapped. This can cause unexpected issues, so fix it by clearing the cached BAR values so they are not accessed until they are re-mapped. Also, make sure any places that can access the BARs when they are NULL are prevented. Fixes: 49ce92fbee0b ("pds_core: add FW update feature to devlink") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Reviewed-by: Przemek Kitszel Link: https://lore.kernel.org/r/20240129234035.69802-6-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/adminq.c | 28 +++++++++++++++------ drivers/net/ethernet/amd/pds_core/core.c | 8 +++++- drivers/net/ethernet/amd/pds_core/dev.c | 9 ++++++- drivers/net/ethernet/amd/pds_core/devlink.c | 3 ++- drivers/net/ethernet/amd/pds_core/fw.c | 3 +++ drivers/net/ethernet/amd/pds_core/main.c | 5 ++++ 6 files changed, 45 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c index 5edff33d56f36..ea773cfa0af67 100644 --- a/drivers/net/ethernet/amd/pds_core/adminq.c +++ b/drivers/net/ethernet/amd/pds_core/adminq.c @@ -191,10 +191,16 @@ static int __pdsc_adminq_post(struct pdsc *pdsc, /* Check that the FW is running */ if (!pdsc_is_fw_running(pdsc)) { - u8 fw_status = ioread8(&pdsc->info_regs->fw_status); - - dev_info(pdsc->dev, "%s: post failed - fw not running %#02x:\n", - __func__, fw_status); + if (pdsc->info_regs) { + u8 fw_status = + ioread8(&pdsc->info_regs->fw_status); + + dev_info(pdsc->dev, "%s: post failed - fw not running %#02x:\n", + __func__, fw_status); + } else { + dev_info(pdsc->dev, "%s: post failed - BARs not setup\n", + __func__); + } ret = -ENXIO; goto err_out_unlock; @@ -266,10 +272,16 @@ int pdsc_adminq_post(struct pdsc *pdsc, break; if (!pdsc_is_fw_running(pdsc)) { - u8 fw_status = ioread8(&pdsc->info_regs->fw_status); - - dev_dbg(pdsc->dev, "%s: post wait failed - fw not running %#02x:\n", - __func__, fw_status); + if (pdsc->info_regs) { + u8 fw_status = + ioread8(&pdsc->info_regs->fw_status); + + dev_dbg(pdsc->dev, "%s: post wait failed - fw not running %#02x:\n", + __func__, fw_status); + } else { + dev_dbg(pdsc->dev, "%s: post wait failed - BARs not setup\n", + __func__); + } err = -ENXIO; break; } diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index f44333bd1256e..65c8a7072e354 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -600,7 +600,13 @@ void pdsc_fw_up(struct pdsc *pdsc) static void pdsc_check_pci_health(struct pdsc *pdsc) { - u8 fw_status = ioread8(&pdsc->info_regs->fw_status); + u8 fw_status; + + /* some sort of teardown already in progress */ + if (!pdsc->info_regs) + return; + + fw_status = ioread8(&pdsc->info_regs->fw_status); /* is PCI broken? */ if (fw_status != PDS_RC_BAD_PCI) diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index 31940b857e0e5..62a38e0a84546 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -57,6 +57,9 @@ int pdsc_err_to_errno(enum pds_core_status_code code) bool pdsc_is_fw_running(struct pdsc *pdsc) { + if (!pdsc->info_regs) + return false; + pdsc->fw_status = ioread8(&pdsc->info_regs->fw_status); pdsc->last_fw_time = jiffies; pdsc->last_hb = ioread32(&pdsc->info_regs->fw_heartbeat); @@ -182,13 +185,17 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd, { int err; + if (!pdsc->cmd_regs) + return -ENXIO; + memcpy_toio(&pdsc->cmd_regs->cmd, cmd, sizeof(*cmd)); pdsc_devcmd_dbell(pdsc); err = pdsc_devcmd_wait(pdsc, cmd->opcode, max_seconds); - memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp)); if ((err == -ENXIO || err == -ETIMEDOUT) && pdsc->wq) queue_work(pdsc->wq, &pdsc->health_work); + else + memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp)); return err; } diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c index e9948ea5bbcdb..54864f27c87a9 100644 --- a/drivers/net/ethernet/amd/pds_core/devlink.c +++ b/drivers/net/ethernet/amd/pds_core/devlink.c @@ -111,7 +111,8 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req, mutex_lock(&pdsc->devcmd_lock); err = pdsc_devcmd_locked(pdsc, &cmd, &comp, pdsc->devcmd_timeout * 2); - memcpy_fromio(&fw_list, pdsc->cmd_regs->data, sizeof(fw_list)); + if (!err) + memcpy_fromio(&fw_list, pdsc->cmd_regs->data, sizeof(fw_list)); mutex_unlock(&pdsc->devcmd_lock); if (err && err != -EIO) return err; diff --git a/drivers/net/ethernet/amd/pds_core/fw.c b/drivers/net/ethernet/amd/pds_core/fw.c index 90a811f3878ae..fa626719e68d1 100644 --- a/drivers/net/ethernet/amd/pds_core/fw.c +++ b/drivers/net/ethernet/amd/pds_core/fw.c @@ -107,6 +107,9 @@ int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw, dev_info(pdsc->dev, "Installing firmware\n"); + if (!pdsc->cmd_regs) + return -ENXIO; + dl = priv_to_devlink(pdsc); devlink_flash_update_status_notify(dl, "Preparing to flash", NULL, 0, 0); diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index 5172a5ad8ec6d..05fdeb235e5f1 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -37,6 +37,11 @@ static void pdsc_unmap_bars(struct pdsc *pdsc) struct pdsc_dev_bar *bars = pdsc->bars; unsigned int i; + pdsc->info_regs = NULL; + pdsc->cmd_regs = NULL; + pdsc->intr_status = NULL; + pdsc->intr_ctrl = NULL; + for (i = 0; i < PDS_CORE_BARS_MAX; i++) { if (bars[i].vaddr) pci_iounmap(pdsc->pdev, bars[i].vaddr); -- GitLab From bc90fbe0c3182157d2be100a2f6c2edbb1820677 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 29 Jan 2024 15:40:35 -0800 Subject: [PATCH 0471/1405] pds_core: Rework teardown/setup flow to be more common Currently the teardown/setup flow for driver probe/remove is quite a bit different from the reset flows in pdsc_fw_down()/pdsc_fw_up(). One key piece that's missing are the calls to pci_alloc_irq_vectors() and pci_free_irq_vectors(). The pcie reset case is calling pci_free_irq_vectors() on reset_prepare, but not calling the corresponding pci_alloc_irq_vectors() on reset_done. This is causing unexpected/unwanted interrupt behavior due to the adminq interrupt being accidentally put into legacy interrupt mode. Also, the pci_alloc_irq_vectors()/pci_free_irq_vectors() functions are being called directly in probe/remove respectively. Fix this inconsistency by making the following changes: 1. Always call pdsc_dev_init() in pdsc_setup(), which calls pci_alloc_irq_vectors() and get rid of the now unused pds_dev_reinit(). 2. Always free/clear the pdsc->intr_info in pdsc_teardown() since this structure will get re-alloced in pdsc_setup(). 3. Move the calls of pci_free_irq_vectors() to pdsc_teardown() since pci_alloc_irq_vectors() will always be called in pdsc_setup()->pdsc_dev_init() for both the probe/remove and reset flows. 4. Make sure to only create the debugfs "identity" entry when it doesn't already exist, which it will in the reset case because it's already been created in the initial call to pdsc_dev_init(). Fixes: ffa55858330f ("pds_core: implement pci reset handlers") Signed-off-by: Brett Creeley Reviewed-by: Shannon Nelson Link: https://lore.kernel.org/r/20240129234035.69802-7-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/core.c | 13 +++++-------- drivers/net/ethernet/amd/pds_core/core.h | 1 - drivers/net/ethernet/amd/pds_core/debugfs.c | 4 ++++ drivers/net/ethernet/amd/pds_core/dev.c | 7 ------- drivers/net/ethernet/amd/pds_core/main.c | 2 -- 5 files changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 65c8a7072e354..7658a72867675 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -404,10 +404,7 @@ int pdsc_setup(struct pdsc *pdsc, bool init) int numdescs; int err; - if (init) - err = pdsc_dev_init(pdsc); - else - err = pdsc_dev_reinit(pdsc); + err = pdsc_dev_init(pdsc); if (err) return err; @@ -479,10 +476,9 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing) for (i = 0; i < pdsc->nintrs; i++) pdsc_intr_free(pdsc, i); - if (removing) { - kfree(pdsc->intr_info); - pdsc->intr_info = NULL; - } + kfree(pdsc->intr_info); + pdsc->intr_info = NULL; + pdsc->nintrs = 0; } if (pdsc->kern_dbpage) { @@ -490,6 +486,7 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing) pdsc->kern_dbpage = NULL; } + pci_free_irq_vectors(pdsc->pdev); set_bit(PDSC_S_FW_DEAD, &pdsc->state); } diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index cbd5716f46e69..110c4b826b22d 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -281,7 +281,6 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd, union pds_core_dev_comp *comp, int max_seconds); int pdsc_devcmd_init(struct pdsc *pdsc); int pdsc_devcmd_reset(struct pdsc *pdsc); -int pdsc_dev_reinit(struct pdsc *pdsc); int pdsc_dev_init(struct pdsc *pdsc); void pdsc_reset_prepare(struct pci_dev *pdev); diff --git a/drivers/net/ethernet/amd/pds_core/debugfs.c b/drivers/net/ethernet/amd/pds_core/debugfs.c index 8ec392299b7dc..4e8579ca1c8c7 100644 --- a/drivers/net/ethernet/amd/pds_core/debugfs.c +++ b/drivers/net/ethernet/amd/pds_core/debugfs.c @@ -64,6 +64,10 @@ DEFINE_SHOW_ATTRIBUTE(identity); void pdsc_debugfs_add_ident(struct pdsc *pdsc) { + /* This file will already exist in the reset flow */ + if (debugfs_lookup("identity", pdsc->dentry)) + return; + debugfs_create_file("identity", 0400, pdsc->dentry, pdsc, &identity_fops); } diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index 62a38e0a84546..e65a1632df505 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -316,13 +316,6 @@ static int pdsc_identify(struct pdsc *pdsc) return 0; } -int pdsc_dev_reinit(struct pdsc *pdsc) -{ - pdsc_init_devinfo(pdsc); - - return pdsc_identify(pdsc); -} - int pdsc_dev_init(struct pdsc *pdsc) { unsigned int nintrs; diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index 05fdeb235e5f1..cdbf053b5376c 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -438,7 +438,6 @@ static void pdsc_remove(struct pci_dev *pdev) mutex_destroy(&pdsc->config_lock); mutex_destroy(&pdsc->devcmd_lock); - pci_free_irq_vectors(pdev); pdsc_unmap_bars(pdsc); pci_release_regions(pdev); } @@ -470,7 +469,6 @@ void pdsc_reset_prepare(struct pci_dev *pdev) pdsc_stop_health_thread(pdsc); pdsc_fw_down(pdsc); - pci_free_irq_vectors(pdev); pdsc_unmap_bars(pdsc); pci_release_regions(pdev); pci_disable_device(pdev); -- GitLab From f7c25d8e17dd759d97ca093faf92eeb7da7b3890 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 30 Jan 2024 18:47:16 +0100 Subject: [PATCH 0472/1405] selftests: net: add missing config for pmtu.sh tests The mentioned test uses a few Kconfig still missing the net config, add them. Before: # Error: Specified qdisc kind is unknown. # Error: Specified qdisc kind is unknown. # Error: Qdisc not classful. # We have an error talking to the kernel # Error: Qdisc not classful. # We have an error talking to the kernel # policy_routing not supported # TEST: ICMPv4 with DSCP and ECN: PMTU exceptions [SKIP] After: # TEST: ICMPv4 with DSCP and ECN: PMTU exceptions [ OK ] Fixes: ec730c3e1f0e ("selftest: net: Test IPv4 PMTU exceptions with DSCP and ECN") Signed-off-by: Paolo Abeni Reviewed-by: Guillaume Nault Reviewed-by: David Ahern Link: https://lore.kernel.org/r/8d27bf6762a5c7b3acc457d6e6872c533040f9c1.1706635101.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 77a173635a29b..98c6bd2228c69 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -49,8 +49,10 @@ CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_NAT=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GACT=m +CONFIG_NET_ACT_PEDIT=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_BPF=m CONFIG_NET_CLS_MATCHALL=m @@ -62,6 +64,7 @@ CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y +CONFIG_NET_SCH_PRIO=m CONFIG_NFT_COMPAT=m CONFIG_NF_FLOW_TABLE=m CONFIG_PSAMPLE=m -- GitLab From e4e4b6d568d2549583cbda3f8ce567e586cb05da Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 30 Jan 2024 18:47:17 +0100 Subject: [PATCH 0473/1405] selftests: net: fix available tunnels detection The pmtu.sh test tries to detect the tunnel protocols available in the running kernel and properly skip the unsupported cases. In a few more complex setup, such detection is unsuccessful, as the script currently ignores some intermediate error code at setup time. Before: # which: no nettest in (/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin) # TEST: vti6: PMTU exceptions (ESP-in-UDP) [FAIL] # PMTU exception wasn't created after creating tunnel exceeding link layer MTU # ./pmtu.sh: line 931: kill: (7543) - No such process # ./pmtu.sh: line 931: kill: (7544) - No such process After: # xfrm4 not supported # TEST: vti4: PMTU exceptions [SKIP] Fixes: ece1278a9b81 ("selftests: net: add ESP-in-UDP PMTU test") Signed-off-by: Paolo Abeni Reviewed-by: Guillaume Nault Reviewed-by: David Ahern Link: https://lore.kernel.org/r/cab10e75fda618e6fff8c595b632f47db58b9309.1706635101.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/pmtu.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index f10879788f61b..31892b366913c 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -707,23 +707,23 @@ setup_xfrm6() { } setup_xfrm4udp() { - setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 4 4500 + setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 4 4500 } setup_xfrm6udp() { - setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 6 4500 + setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 6 4500 } setup_xfrm4udprouted() { - setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 4 4500 + setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 4 4500 } setup_xfrm6udprouted() { - setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 6 4500 + setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 6 4500 } setup_routing_old() { -- GitLab From bc0970d5ac1d1317e212bdf55533935ecb6ae95c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 30 Jan 2024 18:47:18 +0100 Subject: [PATCH 0474/1405] selftests: net: don't access /dev/stdout in pmtu.sh When running the pmtu.sh via the kselftest infra, accessing /dev/stdout gives unexpected results: # dd: failed to open '/dev/stdout': Device or resource busy # TEST: IPv4, bridged vxlan4: PMTU exceptions [FAIL] Let dd use directly the standard output to fix the above: # TEST: IPv4, bridged vxlan4: PMTU exceptions - nexthop objects [ OK ] Fixes: 136a1b434bbb ("selftests: net: test vxlan pmtu exceptions with tcp") Signed-off-by: Paolo Abeni Reviewed-by: Guillaume Nault Reviewed-by: David Ahern Link: https://lore.kernel.org/r/23d7592c5d77d75cff9b34f15c227f92e911c2ae.1706635101.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/pmtu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 31892b366913c..3f118e3f1c66d 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -1339,7 +1339,7 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { sleep 1 - dd if=/dev/zero of=/dev/stdout status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3 + dd if=/dev/zero status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3 size=$(du -sb $tmpoutfile) size=${size%%/tmp/*} -- GitLab From a107d643b2a3382e0a2d2c4ef08bf8c6bff4561d Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Mon, 18 Dec 2023 08:54:00 +0100 Subject: [PATCH 0475/1405] media: Revert "media: rkisp1: Drop IRQF_SHARED" This reverts commit 85d2a31fe4d9be1555f621ead7a520d8791e0f74. The rkisp1 does share interrupt lines on some platforms, after all. Thus we need to revert this, and implement a fix for the rkisp1 shared irq handling in a follow-up patch. Closes: https://lore.kernel.org/all/87o7eo8vym.fsf@gmail.com/ Link: https://lore.kernel.org/r/20231218-rkisp-shirq-fix-v1-1-173007628248@ideasonboard.com Reported-by: Mikhail Rudenko Signed-off-by: Tomi Valkeinen Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c index f96f821a7b50d..acc559652d6eb 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c @@ -559,7 +559,7 @@ static int rkisp1_probe(struct platform_device *pdev) rkisp1->irqs[il] = irq; } - ret = devm_request_irq(dev, irq, info->isrs[i].isr, 0, + ret = devm_request_irq(dev, irq, info->isrs[i].isr, IRQF_SHARED, dev_driver_string(dev), dev); if (ret) { dev_err(dev, "request irq failed: %d\n", ret); -- GitLab From ffb635bb398fc07cb38f8a7b4a82cbe5f412f08e Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Mon, 18 Dec 2023 08:54:01 +0100 Subject: [PATCH 0476/1405] media: rkisp1: Fix IRQ handling due to shared interrupts The driver requests the interrupts as IRQF_SHARED, so the interrupt handlers can be called at any time. If such a call happens while the ISP is powered down, the SoC will hang as the driver tries to access the ISP registers. This can be reproduced even without the platform sharing the IRQ line: Enable CONFIG_DEBUG_SHIRQ and unload the driver, and the board will hang. Fix this by adding a new field, 'irqs_enabled', which is used to bail out from the interrupt handler when the ISP is not operational. Link: https://lore.kernel.org/r/20231218-rkisp-shirq-fix-v1-2-173007628248@ideasonboard.com Signed-off-by: Tomi Valkeinen Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- .../platform/rockchip/rkisp1/rkisp1-capture.c | 3 +++ .../platform/rockchip/rkisp1/rkisp1-common.h | 2 ++ .../platform/rockchip/rkisp1/rkisp1-csi.c | 3 +++ .../platform/rockchip/rkisp1/rkisp1-dev.c | 22 +++++++++++++++++++ .../platform/rockchip/rkisp1/rkisp1-isp.c | 3 +++ 5 files changed, 33 insertions(+) diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c index aebd3c12020bf..c381c22135a21 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c @@ -725,6 +725,9 @@ irqreturn_t rkisp1_capture_isr(int irq, void *ctx) unsigned int i; u32 status; + if (!rkisp1->irqs_enabled) + return IRQ_NONE; + status = rkisp1_read(rkisp1, RKISP1_CIF_MI_MIS); if (!status) return IRQ_NONE; diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h index 4b6b28c05b891..b757f75edecf7 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h @@ -450,6 +450,7 @@ struct rkisp1_debug { * @debug: debug params to be exposed on debugfs * @info: version-specific ISP information * @irqs: IRQ line numbers + * @irqs_enabled: the hardware is enabled and can cause interrupts */ struct rkisp1_device { void __iomem *base_addr; @@ -471,6 +472,7 @@ struct rkisp1_device { struct rkisp1_debug debug; const struct rkisp1_info *info; int irqs[RKISP1_NUM_IRQS]; + bool irqs_enabled; }; /* diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c index b6e47e2f1b949..4202642e05239 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c @@ -196,6 +196,9 @@ irqreturn_t rkisp1_csi_isr(int irq, void *ctx) struct rkisp1_device *rkisp1 = dev_get_drvdata(dev); u32 val, status; + if (!rkisp1->irqs_enabled) + return IRQ_NONE; + status = rkisp1_read(rkisp1, RKISP1_CIF_MIPI_MIS); if (!status) return IRQ_NONE; diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c index acc559652d6eb..73cf08a740118 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c @@ -305,6 +305,24 @@ static int __maybe_unused rkisp1_runtime_suspend(struct device *dev) { struct rkisp1_device *rkisp1 = dev_get_drvdata(dev); + rkisp1->irqs_enabled = false; + /* Make sure the IRQ handler will see the above */ + mb(); + + /* + * Wait until any running IRQ handler has returned. The IRQ handler + * may get called even after this (as it's a shared interrupt line) + * but the 'irqs_enabled' flag will make the handler return immediately. + */ + for (unsigned int il = 0; il < ARRAY_SIZE(rkisp1->irqs); ++il) { + if (rkisp1->irqs[il] == -1) + continue; + + /* Skip if the irq line is the same as previous */ + if (il == 0 || rkisp1->irqs[il - 1] != rkisp1->irqs[il]) + synchronize_irq(rkisp1->irqs[il]); + } + clk_bulk_disable_unprepare(rkisp1->clk_size, rkisp1->clks); return pinctrl_pm_select_sleep_state(dev); } @@ -321,6 +339,10 @@ static int __maybe_unused rkisp1_runtime_resume(struct device *dev) if (ret) return ret; + rkisp1->irqs_enabled = true; + /* Make sure the IRQ handler will see the above */ + mb(); + return 0; } diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c index f00873d31c42b..78a1f7a1499be 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c @@ -976,6 +976,9 @@ irqreturn_t rkisp1_isp_isr(int irq, void *ctx) struct rkisp1_device *rkisp1 = dev_get_drvdata(dev); u32 status, isp_err; + if (!rkisp1->irqs_enabled) + return IRQ_NONE; + status = rkisp1_read(rkisp1, RKISP1_CIF_ISP_MIS); if (!status) return IRQ_NONE; -- GitLab From aa125f229076a8230a171d519dbdb2a862145d33 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 28 Jan 2024 10:23:09 +0200 Subject: [PATCH 0477/1405] wifi: iwlwifi: remove extra kernel-doc This no longer exists, remove the kernel-doc. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Signed-off-by: Kalle Valo Link: https://msgid.link/20240128102209.d2192d79bc09.Id9551728d618248dd471382a5283503a8976237a@changeid --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index e27774e7ed74d..80fda056e46a6 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2005-2014, 2018-2023 Intel Corporation + * Copyright (C) 2005-2014, 2018-2024 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH */ @@ -19,7 +19,6 @@ * @fwrt_ptr: pointer to the buffer coming from fwrt * @trans_ptr: pointer to struct %iwl_trans_dump_data which contains the * transport's data. - * @trans_len: length of the valid data in trans_ptr * @fwrt_len: length of the valid data in fwrt_ptr */ struct iwl_fw_dump_ptrs { -- GitLab From f9c15a678db3acbe769635e3c49f979e2f88a514 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Wed, 24 Jan 2024 09:18:30 -0800 Subject: [PATCH 0478/1405] drm/xe: Fix crash in trace_dma_fence_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit trace_dma_fence_init() uses dma_fence_ops functions like get_driver_name() and get_timeline_name() to generate trace information but the Xe KMD implementation of those functions makes use of xe_hw_fence_ctx that was being set after dma_fence_init(). So here just inverting the order to fix the crash. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: José Roberto de Souza Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240124171830.95774-1-jose.souza@intel.com (cherry picked from commit c6878e47431c72168da08dfbc1496c09b2d3c246) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_hw_fence.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index a6094c81f2ad0..a5de3e7b0bd6a 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -217,13 +217,13 @@ struct xe_hw_fence *xe_hw_fence_create(struct xe_hw_fence_ctx *ctx, if (!fence) return ERR_PTR(-ENOMEM); - dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock, - ctx->dma_fence_ctx, ctx->next_seqno++); - fence->ctx = ctx; fence->seqno_map = seqno_map; INIT_LIST_HEAD(&fence->irq_link); + dma_fence_init(&fence->dma, &xe_hw_fence_ops, &ctx->irq->lock, + ctx->dma_fence_ctx, ctx->next_seqno++); + trace_xe_hw_fence_create(fence); return fence; -- GitLab From 6d2096239af11f1c9fa03e8fc74400ce048078b0 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 26 Jan 2024 14:06:14 -0800 Subject: [PATCH 0479/1405] drm/xe: Grab mem_access when disabling C6 on skip_guc_pc platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If skip_guc_pc is set for a platform, C6 is disabled directly without acquiring a mem_access reference, triggering an assertion inside xe_gt_idle_disable_c6. Fixes: 975e4a3795d4 ("drm/xe: Manually setup C6 when skip_guc_pc is set") Cc: Rodrigo Vivi Cc: Vinay Belgaumkar Signed-off-by: Matt Roper Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240126220613.865939-2-matthew.d.roper@intel.com (cherry picked from commit 9f5971bdf78e0937206556534247243ad56cd735) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_guc_pc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index f71085228cb33..d91702592520a 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -963,7 +963,9 @@ void xe_guc_pc_fini(struct xe_guc_pc *pc) struct xe_device *xe = pc_to_xe(pc); if (xe->info.skip_guc_pc) { + xe_device_mem_access_get(xe); xe_gt_idle_disable_c6(pc_to_gt(pc)); + xe_device_mem_access_put(xe); return; } -- GitLab From efeff7b38ef62fc65069bd2200d151a9d5d38907 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Jan 2024 15:44:13 -0800 Subject: [PATCH 0480/1405] drm/xe: Only allow 1 ufence per exec / bind IOCTL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The way exec ufences are coded only 1 ufence per IOCTL will be signaled. It is possible to fix this but for current use cases 1 ufence per IOCTL is sufficient. Enforce a limit of 1 ufence per IOCTL (both exec and bind to be uniform). v2: - Add fixes tag (Thomas) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Mika Kahola Cc: Thomas Hellström Signed-off-by: Matthew Brost Reviewed-by: Brian Welty Link: https://patchwork.freedesktop.org/patch/msgid/20240124234413.1640825-1-matthew.brost@intel.com (cherry picked from commit d1df9bfbf68c65418f30917f406b6d5bd597714e) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_exec.c | 10 +++++++++- drivers/gpu/drm/xe/xe_sync.h | 5 +++++ drivers/gpu/drm/xe/xe_vm.c | 10 +++++++++- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index b853feed9ccc1..17f26952e6656 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -111,7 +111,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u64 addresses[XE_HW_ENGINE_MAX_INSTANCE]; struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn}; struct drm_exec *exec = &vm_exec.exec; - u32 i, num_syncs = 0; + u32 i, num_syncs = 0, num_ufence = 0; struct xe_sched_job *job; struct dma_fence *rebind_fence; struct xe_vm *vm; @@ -157,6 +157,14 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) SYNC_PARSE_FLAG_LR_MODE : 0)); if (err) goto err_syncs; + + if (xe_sync_is_ufence(&syncs[i])) + num_ufence++; + } + + if (XE_IOCTL_DBG(xe, num_ufence > 1)) { + err = -EINVAL; + goto err_syncs; } if (xe_exec_queue_is_parallel(q)) { diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index d284afbe917c1..f43cdcaca6c57 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -33,4 +33,9 @@ struct dma_fence * xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, struct xe_exec_queue *q, struct xe_vm *vm); +static inline bool xe_sync_is_ufence(struct xe_sync_entry *sync) +{ + return !!sync->ufence; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 53833ab81424c..32ae51945439a 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2851,7 +2851,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct drm_gpuva_ops **ops = NULL; struct xe_vm *vm; struct xe_exec_queue *q = NULL; - u32 num_syncs; + u32 num_syncs, num_ufence = 0; struct xe_sync_entry *syncs = NULL; struct drm_xe_vm_bind_op *bind_ops; LIST_HEAD(ops_list); @@ -2988,6 +2988,14 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); if (err) goto free_syncs; + + if (xe_sync_is_ufence(&syncs[num_syncs])) + num_ufence++; + } + + if (XE_IOCTL_DBG(xe, num_ufence > 1)) { + err = -EINVAL; + goto free_syncs; } if (!args->num_binds) { -- GitLab From 3ecf036b04b9dc72ca5bd62359748e14568fcf3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 17 Jan 2024 14:40:46 +0100 Subject: [PATCH 0481/1405] drm/xe: Annotate mcr_[un]lock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions acquire and release the gt::mcr_lock. Annotate accordingly. Fix the corresponding sparse warning. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Fixes: fb1d55efdfcb ("drm/xe: Cleanup OPEN_BRACE style issues") Cc: Francois Dugast Cc: Rodrigo Vivi Cc: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240117134048.165425-4-thomas.hellstrom@linux.intel.com (cherry picked from commit 97fd7a7e4e877676a2ab1a687ba958b70931abcc) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_gt_mcr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 77925b35cf8dc..8546cd3cc50d1 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -480,7 +480,7 @@ static bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, * to synchronize with external clients (e.g., firmware), so a semaphore * register will also need to be taken. */ -static void mcr_lock(struct xe_gt *gt) +static void mcr_lock(struct xe_gt *gt) __acquires(>->mcr_lock) { struct xe_device *xe = gt_to_xe(gt); int ret = 0; @@ -500,7 +500,7 @@ static void mcr_lock(struct xe_gt *gt) drm_WARN_ON_ONCE(&xe->drm, ret == -ETIMEDOUT); } -static void mcr_unlock(struct xe_gt *gt) +static void mcr_unlock(struct xe_gt *gt) __releases(>->mcr_lock) { /* Release hardware semaphore - this is done by writing 1 to the register */ if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) -- GitLab From ef87557928d1ab3a1487520962f55cd7163e621b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 17 Jan 2024 14:40:47 +0100 Subject: [PATCH 0482/1405] drm/xe: Don't use __user error pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The error pointer macros are not aware of __user pointers and as a consequence sparse warns. Have the copy_mask() function return an integer instead of a __user pointer. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Rodrigo Vivi Cc: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240117134048.165425-5-thomas.hellstrom@linux.intel.com (cherry picked from commit 78366eed6853aa6a5deccb2eb182f9334d2bd208) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_query.c | 50 +++++++++++++++++------------------ 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 9b35673b286c8..7e924faeeea0b 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -459,21 +459,21 @@ static size_t calc_topo_query_size(struct xe_device *xe) sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss)); } -static void __user *copy_mask(void __user *ptr, - struct drm_xe_query_topology_mask *topo, - void *mask, size_t mask_size) +static int copy_mask(void __user **ptr, + struct drm_xe_query_topology_mask *topo, + void *mask, size_t mask_size) { topo->num_bytes = mask_size; - if (copy_to_user(ptr, topo, sizeof(*topo))) - return ERR_PTR(-EFAULT); - ptr += sizeof(topo); + if (copy_to_user(*ptr, topo, sizeof(*topo))) + return -EFAULT; + *ptr += sizeof(topo); - if (copy_to_user(ptr, mask, mask_size)) - return ERR_PTR(-EFAULT); - ptr += mask_size; + if (copy_to_user(*ptr, mask, mask_size)) + return -EFAULT; + *ptr += mask_size; - return ptr; + return 0; } static int query_gt_topology(struct xe_device *xe, @@ -493,28 +493,28 @@ static int query_gt_topology(struct xe_device *xe, } for_each_gt(gt, xe, id) { + int err; + topo.gt_id = id; topo.type = DRM_XE_TOPO_DSS_GEOMETRY; - query_ptr = copy_mask(query_ptr, &topo, - gt->fuse_topo.g_dss_mask, - sizeof(gt->fuse_topo.g_dss_mask)); - if (IS_ERR(query_ptr)) - return PTR_ERR(query_ptr); + err = copy_mask(&query_ptr, &topo, gt->fuse_topo.g_dss_mask, + sizeof(gt->fuse_topo.g_dss_mask)); + if (err) + return err; topo.type = DRM_XE_TOPO_DSS_COMPUTE; - query_ptr = copy_mask(query_ptr, &topo, - gt->fuse_topo.c_dss_mask, - sizeof(gt->fuse_topo.c_dss_mask)); - if (IS_ERR(query_ptr)) - return PTR_ERR(query_ptr); + err = copy_mask(&query_ptr, &topo, gt->fuse_topo.c_dss_mask, + sizeof(gt->fuse_topo.c_dss_mask)); + if (err) + return err; topo.type = DRM_XE_TOPO_EU_PER_DSS; - query_ptr = copy_mask(query_ptr, &topo, - gt->fuse_topo.eu_mask_per_dss, - sizeof(gt->fuse_topo.eu_mask_per_dss)); - if (IS_ERR(query_ptr)) - return PTR_ERR(query_ptr); + err = copy_mask(&query_ptr, &topo, + gt->fuse_topo.eu_mask_per_dss, + sizeof(gt->fuse_topo.eu_mask_per_dss)); + if (err) + return err; } return 0; -- GitLab From 89642db3b28849c23f42baadc88b40435ba6c5c6 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 23 Jan 2024 13:26:38 -0800 Subject: [PATCH 0483/1405] drm/xe: Use LRC prefix rather than CTX prefix in lrc desc defines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sparc build fails [1] due to CTX_VALID being redefined. Fix this by using a better naming convention of LRC_VALID as this define is used in setting bits in the lrc descriptor. To be uniform, change other define with LRC prefix too. [1] https://lore.kernel.org/all/20240123111235.3097079-1-geert@linux-m68k.org/ v2: - s/LEGACY_64B_CONTEXT/LRC_LEGACY_64B_CONTEXT (Lucas) Fixes: 0bc519d20ffa ("drm/xe: Remove GEN[0-9]*_ prefixes") Cc: Thomas Hellström Cc: Lucas De Marchi Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240123212638.1605626-1-matthew.brost@intel.com (cherry picked from commit 152ca51d8db03f08a71c25e999812e263839fdce) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_lrc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index b7fa3831b6845..0ec5ad2539f1b 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -21,10 +21,10 @@ #include "xe_map.h" #include "xe_vm.h" -#define CTX_VALID (1 << 0) -#define CTX_PRIVILEGE (1 << 8) -#define CTX_ADDRESSING_MODE_SHIFT 3 -#define LEGACY_64B_CONTEXT 3 +#define LRC_VALID (1 << 0) +#define LRC_PRIVILEGE (1 << 8) +#define LRC_ADDRESSING_MODE_SHIFT 3 +#define LRC_LEGACY_64B_CONTEXT 3 #define ENGINE_CLASS_SHIFT 61 #define ENGINE_INSTANCE_SHIFT 48 @@ -762,15 +762,15 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, (q->usm.acc_notify << ACC_NOTIFY_S) | q->usm.acc_trigger); - lrc->desc = CTX_VALID; - lrc->desc |= LEGACY_64B_CONTEXT << CTX_ADDRESSING_MODE_SHIFT; + lrc->desc = LRC_VALID; + lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT; /* TODO: Priority */ /* While this appears to have something about privileged batches or * some such, it really just means PPGTT mode. */ if (vm) - lrc->desc |= CTX_PRIVILEGE; + lrc->desc |= LRC_PRIVILEGE; if (GRAPHICS_VERx100(xe) < 1250) { lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT; -- GitLab From ed2bdf3b264d627e1c2f26272660e1d7c2115000 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 31 Jan 2024 10:16:28 +0100 Subject: [PATCH 0484/1405] drm/xe/vm: Subclass userptr vmas MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The construct allocating only parts of the vma structure when the userptr part is not needed is very fragile. A developer could add additional fields below the userptr part, and the code could easily attempt to access the userptr part even if its not persent. So introduce xe_userptr_vma which subclasses struct xe_vma the proper way, and accordingly modify a couple of interfaces. This should also help if adding userptr helpers to drm_gpuvm. v2: - Fix documentation of to_userptr_vma() (Matthew Brost) - Fix allocation and freeing of vmas to clearer distinguish between the types. Closes: https://lore.kernel.org/intel-xe/0c4cc1a7-f409-4597-b110-81f9e45d1ffe@embeddedor.com/T/#u Fixes: a4cc60a55fd9 ("drm/xe: Only alloc userptr part of xe_vma for userptrs") Cc: Rodrigo Vivi Cc: Matthew Brost Cc: Lucas De Marchi Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240131091628.12318-1-thomas.hellstrom@linux.intel.com (cherry picked from commit 5bd24e78829ad569fa1c3ce9a05b59bb97b91f3d) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 11 +- drivers/gpu/drm/xe/xe_pt.c | 32 +++--- drivers/gpu/drm/xe/xe_vm.c | 155 ++++++++++++++++----------- drivers/gpu/drm/xe/xe_vm.h | 16 ++- drivers/gpu/drm/xe/xe_vm_types.h | 16 +-- 5 files changed, 137 insertions(+), 93 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 59a70d2e0a7a3..9c2fe1697d6ee 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -165,7 +165,8 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) goto unlock_vm; } - if (!xe_vma_is_userptr(vma) || !xe_vma_userptr_check_repin(vma)) { + if (!xe_vma_is_userptr(vma) || + !xe_vma_userptr_check_repin(to_userptr_vma(vma))) { downgrade_write(&vm->lock); write_locked = false; } @@ -181,11 +182,13 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) /* TODO: Validate fault */ if (xe_vma_is_userptr(vma) && write_locked) { + struct xe_userptr_vma *uvma = to_userptr_vma(vma); + spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&vma->userptr.invalidate_link); + list_del_init(&uvma->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); - ret = xe_vma_userptr_pin_pages(vma); + ret = xe_vma_userptr_pin_pages(uvma); if (ret) goto unlock_vm; @@ -220,7 +223,7 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) dma_fence_put(fence); if (xe_vma_is_userptr(vma)) - ret = xe_vma_userptr_check_repin(vma); + ret = xe_vma_userptr_check_repin(to_userptr_vma(vma)); vma->usm.tile_invalidated &= ~BIT(tile->id); unlock_dma_resv: diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index de1030a475883..e45b37c3f0c26 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -618,8 +618,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, if (!xe_vma_is_null(vma)) { if (xe_vma_is_userptr(vma)) - xe_res_first_sg(vma->userptr.sg, 0, xe_vma_size(vma), - &curs); + xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, + xe_vma_size(vma), &curs); else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), xe_vma_size(vma), &curs); @@ -906,17 +906,17 @@ static void xe_vm_dbg_print_entries(struct xe_device *xe, #ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT -static int xe_pt_userptr_inject_eagain(struct xe_vma *vma) +static int xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) { - u32 divisor = vma->userptr.divisor ? vma->userptr.divisor : 2; + u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; static u32 count; if (count++ % divisor == divisor - 1) { - struct xe_vm *vm = xe_vma_vm(vma); + struct xe_vm *vm = xe_vma_vm(&uvma->vma); - vma->userptr.divisor = divisor << 1; + uvma->userptr.divisor = divisor << 1; spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&vma->userptr.invalidate_link, + list_move_tail(&uvma->userptr.invalidate_link, &vm->userptr.invalidated); spin_unlock(&vm->userptr.invalidated_lock); return true; @@ -927,7 +927,7 @@ static int xe_pt_userptr_inject_eagain(struct xe_vma *vma) #else -static bool xe_pt_userptr_inject_eagain(struct xe_vma *vma) +static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) { return false; } @@ -1000,9 +1000,9 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) { struct xe_pt_migrate_pt_update *userptr_update = container_of(pt_update, typeof(*userptr_update), base); - struct xe_vma *vma = pt_update->vma; - unsigned long notifier_seq = vma->userptr.notifier_seq; - struct xe_vm *vm = xe_vma_vm(vma); + struct xe_userptr_vma *uvma = to_userptr_vma(pt_update->vma); + unsigned long notifier_seq = uvma->userptr.notifier_seq; + struct xe_vm *vm = xe_vma_vm(&uvma->vma); int err = xe_pt_vm_dependencies(pt_update->job, &vm->rftree[pt_update->tile_id], pt_update->start, @@ -1023,7 +1023,7 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) */ do { down_read(&vm->userptr.notifier_lock); - if (!mmu_interval_read_retry(&vma->userptr.notifier, + if (!mmu_interval_read_retry(&uvma->userptr.notifier, notifier_seq)) break; @@ -1032,11 +1032,11 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) if (userptr_update->bind) return -EAGAIN; - notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier); + notifier_seq = mmu_interval_read_begin(&uvma->userptr.notifier); } while (true); /* Inject errors to test_whether they are handled correctly */ - if (userptr_update->bind && xe_pt_userptr_inject_eagain(vma)) { + if (userptr_update->bind && xe_pt_userptr_inject_eagain(uvma)) { up_read(&vm->userptr.notifier_lock); return -EAGAIN; } @@ -1297,7 +1297,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue vma->tile_present |= BIT(tile->id); if (bind_pt_update.locked) { - vma->userptr.initial_bind = true; + to_userptr_vma(vma)->userptr.initial_bind = true; up_read(&vm->userptr.notifier_lock); xe_bo_put_commit(&deferred); } @@ -1642,7 +1642,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu if (!vma->tile_present) { spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&vma->userptr.invalidate_link); + list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); } up_read(&vm->userptr.notifier_lock); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 32ae51945439a..30db264d34a32 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -46,7 +46,7 @@ static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) /** * xe_vma_userptr_check_repin() - Advisory check for repin needed - * @vma: The userptr vma + * @uvma: The userptr vma * * Check if the userptr vma has been invalidated since last successful * repin. The check is advisory only and can the function can be called @@ -56,15 +56,17 @@ static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) * * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. */ -int xe_vma_userptr_check_repin(struct xe_vma *vma) +int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) { - return mmu_interval_check_retry(&vma->userptr.notifier, - vma->userptr.notifier_seq) ? + return mmu_interval_check_retry(&uvma->userptr.notifier, + uvma->userptr.notifier_seq) ? -EAGAIN : 0; } -int xe_vma_userptr_pin_pages(struct xe_vma *vma) +int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) { + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vma *vma = &uvma->vma; struct xe_vm *vm = xe_vma_vm(vma); struct xe_device *xe = vm->xe; const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT; @@ -80,30 +82,30 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma) if (vma->gpuva.flags & XE_VMA_DESTROYED) return 0; - notifier_seq = mmu_interval_read_begin(&vma->userptr.notifier); - if (notifier_seq == vma->userptr.notifier_seq) + notifier_seq = mmu_interval_read_begin(&userptr->notifier); + if (notifier_seq == userptr->notifier_seq) return 0; pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL); if (!pages) return -ENOMEM; - if (vma->userptr.sg) { + if (userptr->sg) { dma_unmap_sgtable(xe->drm.dev, - vma->userptr.sg, + userptr->sg, read_only ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL, 0); - sg_free_table(vma->userptr.sg); - vma->userptr.sg = NULL; + sg_free_table(userptr->sg); + userptr->sg = NULL; } pinned = ret = 0; if (in_kthread) { - if (!mmget_not_zero(vma->userptr.notifier.mm)) { + if (!mmget_not_zero(userptr->notifier.mm)) { ret = -EFAULT; goto mm_closed; } - kthread_use_mm(vma->userptr.notifier.mm); + kthread_use_mm(userptr->notifier.mm); } while (pinned < num_pages) { @@ -123,32 +125,32 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma) } if (in_kthread) { - kthread_unuse_mm(vma->userptr.notifier.mm); - mmput(vma->userptr.notifier.mm); + kthread_unuse_mm(userptr->notifier.mm); + mmput(userptr->notifier.mm); } mm_closed: if (ret) goto out; - ret = sg_alloc_table_from_pages_segment(&vma->userptr.sgt, pages, + ret = sg_alloc_table_from_pages_segment(&userptr->sgt, pages, pinned, 0, (u64)pinned << PAGE_SHIFT, xe_sg_segment_size(xe->drm.dev), GFP_KERNEL); if (ret) { - vma->userptr.sg = NULL; + userptr->sg = NULL; goto out; } - vma->userptr.sg = &vma->userptr.sgt; + userptr->sg = &userptr->sgt; - ret = dma_map_sgtable(xe->drm.dev, vma->userptr.sg, + ret = dma_map_sgtable(xe->drm.dev, userptr->sg, read_only ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); if (ret) { - sg_free_table(vma->userptr.sg); - vma->userptr.sg = NULL; + sg_free_table(userptr->sg); + userptr->sg = NULL; goto out; } @@ -167,8 +169,8 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma) kvfree(pages); if (!(ret < 0)) { - vma->userptr.notifier_seq = notifier_seq; - if (xe_vma_userptr_check_repin(vma) == -EAGAIN) + userptr->notifier_seq = notifier_seq; + if (xe_vma_userptr_check_repin(uvma) == -EAGAIN) goto retry; } @@ -635,7 +637,9 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, const struct mmu_notifier_range *range, unsigned long cur_seq) { - struct xe_vma *vma = container_of(mni, struct xe_vma, userptr.notifier); + struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier); + struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr); + struct xe_vma *vma = &uvma->vma; struct xe_vm *vm = xe_vma_vm(vma); struct dma_resv_iter cursor; struct dma_fence *fence; @@ -651,7 +655,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, mmu_interval_set_seq(mni, cur_seq); /* No need to stop gpu access if the userptr is not yet bound. */ - if (!vma->userptr.initial_bind) { + if (!userptr->initial_bind) { up_write(&vm->userptr.notifier_lock); return true; } @@ -663,7 +667,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, if (!xe_vm_in_fault_mode(vm) && !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) { spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&vma->userptr.invalidate_link, + list_move_tail(&userptr->invalidate_link, &vm->userptr.invalidated); spin_unlock(&vm->userptr.invalidated_lock); } @@ -703,7 +707,7 @@ static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { int xe_vm_userptr_pin(struct xe_vm *vm) { - struct xe_vma *vma, *next; + struct xe_userptr_vma *uvma, *next; int err = 0; LIST_HEAD(tmp_evict); @@ -711,22 +715,23 @@ int xe_vm_userptr_pin(struct xe_vm *vm) /* Collect invalidated userptrs */ spin_lock(&vm->userptr.invalidated_lock); - list_for_each_entry_safe(vma, next, &vm->userptr.invalidated, + list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, userptr.invalidate_link) { - list_del_init(&vma->userptr.invalidate_link); - list_move_tail(&vma->combined_links.userptr, + list_del_init(&uvma->userptr.invalidate_link); + list_move_tail(&uvma->userptr.repin_link, &vm->userptr.repin_list); } spin_unlock(&vm->userptr.invalidated_lock); /* Pin and move to temporary list */ - list_for_each_entry_safe(vma, next, &vm->userptr.repin_list, - combined_links.userptr) { - err = xe_vma_userptr_pin_pages(vma); + list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, + userptr.repin_link) { + err = xe_vma_userptr_pin_pages(uvma); if (err < 0) return err; - list_move_tail(&vma->combined_links.userptr, &vm->rebind_list); + list_del_init(&uvma->userptr.repin_link); + list_move_tail(&uvma->vma.combined_links.rebind, &vm->rebind_list); } return 0; @@ -782,6 +787,14 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) return fence; } +static void xe_vma_free(struct xe_vma *vma) +{ + if (xe_vma_is_userptr(vma)) + kfree(to_userptr_vma(vma)); + else + kfree(vma); +} + #define VMA_CREATE_FLAG_READ_ONLY BIT(0) #define VMA_CREATE_FLAG_IS_NULL BIT(1) @@ -800,14 +813,26 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, xe_assert(vm->xe, start < end); xe_assert(vm->xe, end < vm->size); - if (!bo && !is_null) /* userptr */ + /* + * Allocate and ensure that the xe_vma_is_userptr() return + * matches what was allocated. + */ + if (!bo && !is_null) { + struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); + + if (!uvma) + return ERR_PTR(-ENOMEM); + + vma = &uvma->vma; + } else { vma = kzalloc(sizeof(*vma), GFP_KERNEL); - else - vma = kzalloc(sizeof(*vma) - sizeof(struct xe_userptr), - GFP_KERNEL); - if (!vma) { - vma = ERR_PTR(-ENOMEM); - return vma; + if (!vma) + return ERR_PTR(-ENOMEM); + + if (is_null) + vma->gpuva.flags |= DRM_GPUVA_SPARSE; + if (bo) + vma->gpuva.gem.obj = &bo->ttm.base; } INIT_LIST_HEAD(&vma->combined_links.rebind); @@ -818,8 +843,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vma->gpuva.va.range = end - start + 1; if (read_only) vma->gpuva.flags |= XE_VMA_READ_ONLY; - if (is_null) - vma->gpuva.flags |= DRM_GPUVA_SPARSE; for_each_tile(tile, vm->xe, id) vma->tile_mask |= 0x1 << id; @@ -836,35 +859,35 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); if (IS_ERR(vm_bo)) { - kfree(vma); + xe_vma_free(vma); return ERR_CAST(vm_bo); } drm_gpuvm_bo_extobj_add(vm_bo); drm_gem_object_get(&bo->ttm.base); - vma->gpuva.gem.obj = &bo->ttm.base; vma->gpuva.gem.offset = bo_offset_or_userptr; drm_gpuva_link(&vma->gpuva, vm_bo); drm_gpuvm_bo_put(vm_bo); } else /* userptr or null */ { if (!is_null) { + struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; u64 size = end - start + 1; int err; - INIT_LIST_HEAD(&vma->userptr.invalidate_link); + INIT_LIST_HEAD(&userptr->invalidate_link); + INIT_LIST_HEAD(&userptr->repin_link); vma->gpuva.gem.offset = bo_offset_or_userptr; - err = mmu_interval_notifier_insert(&vma->userptr.notifier, + err = mmu_interval_notifier_insert(&userptr->notifier, current->mm, xe_vma_userptr(vma), size, &vma_userptr_notifier_ops); if (err) { - kfree(vma); - vma = ERR_PTR(err); - return vma; + xe_vma_free(vma); + return ERR_PTR(err); } - vma->userptr.notifier_seq = LONG_MAX; + userptr->notifier_seq = LONG_MAX; } xe_vm_get(vm); @@ -880,13 +903,15 @@ static void xe_vma_destroy_late(struct xe_vma *vma) bool read_only = xe_vma_read_only(vma); if (xe_vma_is_userptr(vma)) { - if (vma->userptr.sg) { + struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; + + if (userptr->sg) { dma_unmap_sgtable(xe->drm.dev, - vma->userptr.sg, + userptr->sg, read_only ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL, 0); - sg_free_table(vma->userptr.sg); - vma->userptr.sg = NULL; + sg_free_table(userptr->sg); + userptr->sg = NULL; } /* @@ -894,7 +919,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma) * the notifer until we're sure the GPU is not accessing * them anymore */ - mmu_interval_notifier_remove(&vma->userptr.notifier); + mmu_interval_notifier_remove(&userptr->notifier); xe_vm_put(vm); } else if (xe_vma_is_null(vma)) { xe_vm_put(vm); @@ -902,7 +927,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma) xe_bo_put(xe_vma_bo(vma)); } - kfree(vma); + xe_vma_free(vma); } static void vma_destroy_work_func(struct work_struct *w) @@ -933,7 +958,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); spin_lock(&vm->userptr.invalidated_lock); - list_del(&vma->userptr.invalidate_link); + list_del(&to_userptr_vma(vma)->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); } else if (!xe_vma_is_null(vma)) { xe_bo_assert_held(xe_vma_bo(vma)); @@ -2150,7 +2175,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, drm_exec_fini(&exec); if (xe_vma_is_userptr(vma)) { - err = xe_vma_userptr_pin_pages(vma); + err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); if (err) { prep_vma_destroy(vm, vma, false); xe_vma_destroy_unlocked(vma); @@ -2507,7 +2532,7 @@ static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma, if (err == -EAGAIN && xe_vma_is_userptr(vma)) { lockdep_assert_held_write(&vm->lock); - err = xe_vma_userptr_pin_pages(vma); + err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); if (!err) goto retry_userptr; @@ -3138,8 +3163,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { if (xe_vma_is_userptr(vma)) { WARN_ON_ONCE(!mmu_interval_check_retry - (&vma->userptr.notifier, - vma->userptr.notifier_seq)); + (&to_userptr_vma(vma)->userptr.notifier, + to_userptr_vma(vma)->userptr.notifier_seq)); WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), DMA_RESV_USAGE_BOOKKEEP)); @@ -3200,11 +3225,11 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) if (is_null) { addr = 0; } else if (is_userptr) { + struct sg_table *sg = to_userptr_vma(vma)->userptr.sg; struct xe_res_cursor cur; - if (vma->userptr.sg) { - xe_res_first_sg(vma->userptr.sg, 0, XE_PAGE_SIZE, - &cur); + if (sg) { + xe_res_first_sg(sg, 0, XE_PAGE_SIZE, &cur); addr = xe_res_dma(&cur); } else { addr = 0; diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index cf2f96e8c1ab9..9654a0612fc25 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -160,6 +160,18 @@ static inline bool xe_vma_is_userptr(struct xe_vma *vma) return xe_vma_has_no_bo(vma) && !xe_vma_is_null(vma); } +/** + * to_userptr_vma() - Return a pointer to an embedding userptr vma + * @vma: Pointer to the embedded struct xe_vma + * + * Return: Pointer to the embedding userptr vma + */ +static inline struct xe_userptr_vma *to_userptr_vma(struct xe_vma *vma) +{ + xe_assert(xe_vma_vm(vma)->xe, xe_vma_is_userptr(vma)); + return container_of(vma, struct xe_userptr_vma, vma); +} + u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile); int xe_vm_create_ioctl(struct drm_device *dev, void *data, @@ -224,9 +236,9 @@ static inline void xe_vm_reactivate_rebind(struct xe_vm *vm) } } -int xe_vma_userptr_pin_pages(struct xe_vma *vma); +int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma); -int xe_vma_userptr_check_repin(struct xe_vma *vma); +int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma); bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 63e8a50b88e94..1fec66ae2eb2d 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -37,6 +37,8 @@ struct xe_vm; struct xe_userptr { /** @invalidate_link: Link for the vm::userptr.invalidated list */ struct list_head invalidate_link; + /** @userptr: link into VM repin list if userptr. */ + struct list_head repin_link; /** * @notifier: MMU notifier for user pointer (invalidation call back) */ @@ -68,8 +70,6 @@ struct xe_vma { * resv. */ union { - /** @userptr: link into VM repin list if userptr. */ - struct list_head userptr; /** @rebind: link into VM if this VMA needs rebinding. */ struct list_head rebind; /** @destroy: link to contested list when VM is being closed. */ @@ -105,11 +105,15 @@ struct xe_vma { * @pat_index: The pat index to use when encoding the PTEs for this vma. */ u16 pat_index; +}; - /** - * @userptr: user pointer state, only allocated for VMAs that are - * user pointers - */ +/** + * struct xe_userptr_vma - A userptr vma subclass + * @vma: The vma. + * @userptr: Additional userptr information. + */ +struct xe_userptr_vma { + struct xe_vma vma; struct xe_userptr userptr; }; -- GitLab From c9cfed29f5fe13f97e46c3879517d8c41ae251d6 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 30 Jan 2024 18:54:24 -0800 Subject: [PATCH 0485/1405] drm/xe: Make all GuC ABI shift values unsigned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All GuC ABI definitions are unsigned and not defining as unsigned is causing build errors [1]. [1] https://lore.kernel.org/all/20240123111235.3097079-1-geert@linux-m68k.org/ Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Thomas Hellström Cc: Lucas De Marchi Cc: Michal Wajdeczko Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240131025424.2087936-1-matthew.brost@intel.com (cherry picked from commit d83d8ae275c6bf87506b71b8a1acd98452137dc5) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/abi/guc_actions_abi.h | 4 ++-- drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h | 4 ++-- .../drm/xe/abi/guc_communication_ctb_abi.h | 8 ++++---- drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 6 +++--- drivers/gpu/drm/xe/abi/guc_messages_abi.h | 20 +++++++++---------- 5 files changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index 3062e0e0d467e..79ba98a169f90 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -50,8 +50,8 @@ #define HOST2GUC_SELF_CFG_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u) #define HOST2GUC_SELF_CFG_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 -#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY (0xffff << 16) -#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN (0xffff << 0) +#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY (0xffffu << 16) +#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN (0xffffu << 0) #define HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32 GUC_HXG_REQUEST_MSG_n_DATAn #define HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64 GUC_HXG_REQUEST_MSG_n_DATAn diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h index 811add10c30dc..c165e26c09766 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h @@ -242,8 +242,8 @@ struct slpc_shared_data { (HOST2GUC_PC_SLPC_REQUEST_REQUEST_MSG_MIN_LEN + \ HOST2GUC_PC_SLPC_EVENT_MAX_INPUT_ARGS) #define HOST2GUC_PC_SLPC_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 -#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID (0xff << 8) -#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC (0xff << 0) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ID (0xffu << 8) +#define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC (0xffu << 0) #define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N GUC_HXG_REQUEST_MSG_n_DATAn #endif diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h index 3b83f907ece46..0b1146d0c997a 100644 --- a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h @@ -82,11 +82,11 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64); #define GUC_CTB_HDR_LEN 1u #define GUC_CTB_MSG_MIN_LEN GUC_CTB_HDR_LEN #define GUC_CTB_MSG_MAX_LEN 256u -#define GUC_CTB_MSG_0_FENCE (0xffff << 16) -#define GUC_CTB_MSG_0_FORMAT (0xf << 12) +#define GUC_CTB_MSG_0_FENCE (0xffffu << 16) +#define GUC_CTB_MSG_0_FORMAT (0xfu << 12) #define GUC_CTB_FORMAT_HXG 0u -#define GUC_CTB_MSG_0_RESERVED (0xf << 8) -#define GUC_CTB_MSG_0_NUM_DWORDS (0xff << 0) +#define GUC_CTB_MSG_0_RESERVED (0xfu << 8) +#define GUC_CTB_MSG_0_NUM_DWORDS (0xffu << 0) /** * DOC: CTB HXG Message diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 47094b9b044cb..0400bc0fccdc9 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -31,9 +31,9 @@ */ #define GUC_KLV_LEN_MIN 1u -#define GUC_KLV_0_KEY (0xffff << 16) -#define GUC_KLV_0_LEN (0xffff << 0) -#define GUC_KLV_n_VALUE (0xffffffff << 0) +#define GUC_KLV_0_KEY (0xffffu << 16) +#define GUC_KLV_0_LEN (0xffffu << 0) +#define GUC_KLV_n_VALUE (0xffffffffu << 0) /** * DOC: GuC Self Config KLVs diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h index 3d199016cf881..29e414c82d56c 100644 --- a/drivers/gpu/drm/xe/abi/guc_messages_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h @@ -40,18 +40,18 @@ */ #define GUC_HXG_MSG_MIN_LEN 1u -#define GUC_HXG_MSG_0_ORIGIN (0x1 << 31) +#define GUC_HXG_MSG_0_ORIGIN (0x1u << 31) #define GUC_HXG_ORIGIN_HOST 0u #define GUC_HXG_ORIGIN_GUC 1u -#define GUC_HXG_MSG_0_TYPE (0x7 << 28) +#define GUC_HXG_MSG_0_TYPE (0x7u << 28) #define GUC_HXG_TYPE_REQUEST 0u #define GUC_HXG_TYPE_EVENT 1u #define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u #define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u #define GUC_HXG_TYPE_RESPONSE_FAILURE 6u #define GUC_HXG_TYPE_RESPONSE_SUCCESS 7u -#define GUC_HXG_MSG_0_AUX (0xfffffff << 0) -#define GUC_HXG_MSG_n_PAYLOAD (0xffffffff << 0) +#define GUC_HXG_MSG_0_AUX (0xfffffffu << 0) +#define GUC_HXG_MSG_n_PAYLOAD (0xffffffffu << 0) /** * DOC: HXG Request @@ -85,8 +85,8 @@ */ #define GUC_HXG_REQUEST_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN -#define GUC_HXG_REQUEST_MSG_0_DATA0 (0xfff << 16) -#define GUC_HXG_REQUEST_MSG_0_ACTION (0xffff << 0) +#define GUC_HXG_REQUEST_MSG_0_DATA0 (0xfffu << 16) +#define GUC_HXG_REQUEST_MSG_0_ACTION (0xffffu << 0) #define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD /** @@ -117,8 +117,8 @@ */ #define GUC_HXG_EVENT_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN -#define GUC_HXG_EVENT_MSG_0_DATA0 (0xfff << 16) -#define GUC_HXG_EVENT_MSG_0_ACTION (0xffff << 0) +#define GUC_HXG_EVENT_MSG_0_DATA0 (0xfffu << 16) +#define GUC_HXG_EVENT_MSG_0_ACTION (0xffffu << 0) #define GUC_HXG_EVENT_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD /** @@ -188,8 +188,8 @@ */ #define GUC_HXG_FAILURE_MSG_LEN GUC_HXG_MSG_MIN_LEN -#define GUC_HXG_FAILURE_MSG_0_HINT (0xfff << 16) -#define GUC_HXG_FAILURE_MSG_0_ERROR (0xffff << 0) +#define GUC_HXG_FAILURE_MSG_0_HINT (0xfffu << 16) +#define GUC_HXG_FAILURE_MSG_0_ERROR (0xffffu << 0) /** * DOC: HXG Response -- GitLab From 5f16ee27cd5abd5166e28b2311ac693c204063ff Mon Sep 17 00:00:00 2001 From: Badal Nilawar Date: Sat, 27 Jan 2024 22:20:40 +0530 Subject: [PATCH 0486/1405] drm/hwmon: Fix abi doc warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes warnings in xe, i915 hwmon docs: Warning: /sys/devices/.../hwmon/hwmon/curr1_crit is defined 2 times: Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon:35 Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon:52 Warning: /sys/devices/.../hwmon/hwmon/energy1_input is defined 2 times: Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon:54 Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon:65 Warning: /sys/devices/.../hwmon/hwmon/in0_input is defined 2 times: Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon:46 Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon:0 Warning: /sys/devices/.../hwmon/hwmon/power1_crit is defined 2 times: Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon:22 Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon:39 Warning: /sys/devices/.../hwmon/hwmon/power1_max is defined 2 times: Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon:0 Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon:8 Warning: /sys/devices/.../hwmon/hwmon/power1_max_interval is defined 2 times: Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon:62 Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon:30 Warning: /sys/devices/.../hwmon/hwmon/power1_rated_max is defined 2 times: Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon:14 Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon:22 Use a path containing the driver name to differentiate the documentation of each entry. Fixes: fb1b70607f73 ("drm/xe/hwmon: Expose power attributes") Fixes: 92d44a422d0d ("drm/xe/hwmon: Expose card reactive critical power") Fixes: fbcdc9d3bf58 ("drm/xe/hwmon: Expose input voltage attribute") Fixes: 71d0a32524f9 ("drm/xe/hwmon: Expose hwmon energy attribute") Fixes: 4446fcf220ce ("drm/xe/hwmon: Expose power1_max_interval") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/all/20240125113345.291118ff@canb.auug.org.au/ Signed-off-by: Badal Nilawar Reviewed-by: Ashutosh Dixit Reviewed-by: Lucas De Marchi Acked-by: Rodrigo Vivi Acked-by: Jani Nikula Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240127165040.2348009-1-badal.nilawar@intel.com (cherry picked from commit 20485e3a810c480cef60caf53988619f61127e7b) Signed-off-by: Thomas Hellström --- .../ABI/testing/sysfs-driver-intel-i915-hwmon | 14 +++++++------- .../ABI/testing/sysfs-driver-intel-xe-hwmon | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon index 8d7d8f05f6cd0..92fe7c5c5ac1d 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon @@ -1,4 +1,4 @@ -What: /sys/devices/.../hwmon/hwmon/in0_input +What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon/in0_input Date: February 2023 KernelVersion: 6.2 Contact: intel-gfx@lists.freedesktop.org @@ -6,7 +6,7 @@ Description: RO. Current Voltage in millivolt. Only supported for particular Intel i915 graphics platforms. -What: /sys/devices/.../hwmon/hwmon/power1_max +What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon/power1_max Date: February 2023 KernelVersion: 6.2 Contact: intel-gfx@lists.freedesktop.org @@ -20,7 +20,7 @@ Description: RW. Card reactive sustained (PL1/Tau) power limit in microwatts. Only supported for particular Intel i915 graphics platforms. -What: /sys/devices/.../hwmon/hwmon/power1_rated_max +What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon/power1_rated_max Date: February 2023 KernelVersion: 6.2 Contact: intel-gfx@lists.freedesktop.org @@ -28,7 +28,7 @@ Description: RO. Card default power limit (default TDP setting). Only supported for particular Intel i915 graphics platforms. -What: /sys/devices/.../hwmon/hwmon/power1_max_interval +What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon/power1_max_interval Date: February 2023 KernelVersion: 6.2 Contact: intel-gfx@lists.freedesktop.org @@ -37,7 +37,7 @@ Description: RW. Sustained power limit interval (Tau in PL1/Tau) in Only supported for particular Intel i915 graphics platforms. -What: /sys/devices/.../hwmon/hwmon/power1_crit +What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon/power1_crit Date: February 2023 KernelVersion: 6.2 Contact: intel-gfx@lists.freedesktop.org @@ -50,7 +50,7 @@ Description: RW. Card reactive critical (I1) power limit in microwatts. Only supported for particular Intel i915 graphics platforms. -What: /sys/devices/.../hwmon/hwmon/curr1_crit +What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon/curr1_crit Date: February 2023 KernelVersion: 6.2 Contact: intel-gfx@lists.freedesktop.org @@ -63,7 +63,7 @@ Description: RW. Card reactive critical (I1) power limit in milliamperes. Only supported for particular Intel i915 graphics platforms. -What: /sys/devices/.../hwmon/hwmon/energy1_input +What: /sys/bus/pci/drivers/i915/.../hwmon/hwmon/energy1_input Date: February 2023 KernelVersion: 6.2 Contact: intel-gfx@lists.freedesktop.org diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon index 8c321bc9dc044..023fd82de3f70 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -1,4 +1,4 @@ -What: /sys/devices/.../hwmon/hwmon/power1_max +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/power1_max Date: September 2023 KernelVersion: 6.5 Contact: intel-xe@lists.freedesktop.org @@ -12,7 +12,7 @@ Description: RW. Card reactive sustained (PL1) power limit in microwatts. Only supported for particular Intel xe graphics platforms. -What: /sys/devices/.../hwmon/hwmon/power1_rated_max +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/power1_rated_max Date: September 2023 KernelVersion: 6.5 Contact: intel-xe@lists.freedesktop.org @@ -20,7 +20,7 @@ Description: RO. Card default power limit (default TDP setting). Only supported for particular Intel xe graphics platforms. -What: /sys/devices/.../hwmon/hwmon/power1_crit +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/power1_crit Date: September 2023 KernelVersion: 6.5 Contact: intel-xe@lists.freedesktop.org @@ -33,7 +33,7 @@ Description: RW. Card reactive critical (I1) power limit in microwatts. Only supported for particular Intel xe graphics platforms. -What: /sys/devices/.../hwmon/hwmon/curr1_crit +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/curr1_crit Date: September 2023 KernelVersion: 6.5 Contact: intel-xe@lists.freedesktop.org @@ -44,7 +44,7 @@ Description: RW. Card reactive critical (I1) power limit in milliamperes. the operating frequency if the power averaged over a window exceeds this limit. -What: /sys/devices/.../hwmon/hwmon/in0_input +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/in0_input Date: September 2023 KernelVersion: 6.5 Contact: intel-xe@lists.freedesktop.org @@ -52,7 +52,7 @@ Description: RO. Current Voltage in millivolt. Only supported for particular Intel xe graphics platforms. -What: /sys/devices/.../hwmon/hwmon/energy1_input +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/energy1_input Date: September 2023 KernelVersion: 6.5 Contact: intel-xe@lists.freedesktop.org @@ -60,7 +60,7 @@ Description: RO. Energy input of device in microjoules. Only supported for particular Intel xe graphics platforms. -What: /sys/devices/.../hwmon/hwmon/power1_max_interval +What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/power1_max_interval Date: October 2023 KernelVersion: 6.6 Contact: intel-xe@lists.freedesktop.org -- GitLab From 5f9ab17394f831cb7986ec50900fa37507a127f1 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 1 Feb 2024 20:53:18 +0900 Subject: [PATCH 0487/1405] firewire: core: correct documentation of fw_csr_string() kernel API Against its current description, the kernel API can accepts all types of directory entries. This commit corrects the documentation. Cc: stable@vger.kernel.org Fixes: 3c2c58cb33b3 ("firewire: core: fw_csr_string addendum") Link: https://lore.kernel.org/r/20240130100409.30128-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-device.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 0547253d16fe5..e4cb5106fb7d1 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -118,10 +118,9 @@ static int textual_leaf_to_string(const u32 *block, char *buf, size_t size) * @buf: where to put the string * @size: size of @buf, in bytes * - * The string is taken from a minimal ASCII text descriptor leaf after - * the immediate entry with @key. The string is zero-terminated. - * An overlong string is silently truncated such that it and the - * zero byte fit into @size. + * The string is taken from a minimal ASCII text descriptor leaf just after the entry with the + * @key. The string is zero-terminated. An overlong string is silently truncated such that it + * and the zero byte fit into @size. * * Returns strlen(buf) or a negative error code. */ -- GitLab From 47dc55181dcbee69fc84c902e7fb060213b9b8a5 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 1 Feb 2024 20:53:18 +0900 Subject: [PATCH 0488/1405] firewire: core: search descriptor leaf just after vendor directory entry in root directory It appears that Sony DVMC-DA1 has a quirk that the descriptor leaf entry locates just after the vendor directory entry in root directory. This is not conformant to the legacy layout of configuration ROM described in Configuration ROM for AV/C Devices 1.0 (1394 Trading Association, Dec 2000, TA Document 1999027). This commit changes current implementation to parse configuration ROM for device attributes so that the descriptor leaf entry can be detected for the vendor name. $ config-rom-pretty-printer < Sony-DVMC-DA1.img ROM header and bus information block ----------------------------------------------------------------- 1024 041ee7fb bus_info_length 4, crc_length 30, crc 59387 1028 31333934 bus_name "1394" 1032 e0644000 irmc 1, cmc 1, isc 1, bmc 0, cyc_clk_acc 100, max_rec 4 (32) 1036 08004603 company_id 080046 | 1040 0014193c device_id 12886219068 | EUI-64 576537731003586876 root directory ----------------------------------------------------------------- 1044 0006b681 directory_length 6, crc 46721 1048 03080046 vendor 1052 0c0083c0 node capabilities: per IEEE 1394 1056 8d00000a --> eui-64 leaf at 1096 1060 d1000003 --> unit directory at 1072 1064 c3000005 --> vendor directory at 1084 1068 8100000a --> descriptor leaf at 1108 unit directory at 1072 ----------------------------------------------------------------- 1072 0002cdbf directory_length 2, crc 52671 1076 1200a02d specifier id 1080 13010000 version vendor directory at 1084 ----------------------------------------------------------------- 1084 00020cfe directory_length 2, crc 3326 1088 17fa0000 model 1092 81000008 --> descriptor leaf at 1124 eui-64 leaf at 1096 ----------------------------------------------------------------- 1096 0002c66e leaf_length 2, crc 50798 1100 08004603 company_id 080046 | 1104 0014193c device_id 12886219068 | EUI-64 576537731003586876 descriptor leaf at 1108 ----------------------------------------------------------------- 1108 00039e26 leaf_length 3, crc 40486 1112 00000000 textual descriptor 1116 00000000 minimal ASCII 1120 536f6e79 "Sony" descriptor leaf at 1124 ----------------------------------------------------------------- 1124 0005001d leaf_length 5, crc 29 1128 00000000 textual descriptor 1132 00000000 minimal ASCII 1136 44564d43 "DVMC" 1140 2d444131 "-DA1" 1144 00000000 Suggested-by: Adam Goldman Tested-by: Adam Goldman Link: https://lore.kernel.org/r/20240130100409.30128-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-device.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index e4cb5106fb7d1..7d3346b3a2bf3 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -367,8 +367,17 @@ static ssize_t show_text_leaf(struct device *dev, for (i = 0; i < ARRAY_SIZE(directories) && !!directories[i]; ++i) { int result = fw_csr_string(directories[i], attr->key, buf, bufsize); // Detected. - if (result >= 0) + if (result >= 0) { ret = result; + } else if (i == 0 && attr->key == CSR_VENDOR) { + // Sony DVMC-DA1 has configuration ROM such that the descriptor leaf entry + // in the root directory follows to the directory entry for vendor ID + // instead of the immediate value for vendor ID. + result = fw_csr_string(directories[i], CSR_DIRECTORY | attr->key, buf, + bufsize); + if (result >= 0) + ret = result; + } } if (ret >= 0) { -- GitLab From 83b3836bf83f09beea5f592b126cfdd1bc921e48 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Jan 2024 12:12:53 -0400 Subject: [PATCH 0489/1405] iommu: Allow ops->default_domain to work when !CONFIG_IOMMU_DMA The ops->default_domain flow used a 0 req_type to select the default domain and this was enforced by iommu_group_alloc_default_domain(). When !CONFIG_IOMMU_DMA started forcing the old ARM32 drivers into IDENTITY it also overroad the 0 req_type of the ops->default_domain drivers to IDENTITY which ends up causing failures during device probe. Make iommu_group_alloc_default_domain() accept a req_type that matches the ops->default_domain and have iommu_group_alloc_default_domain() generate a req_type that matches the default_domain. This way the req_type always describes what kind of domain should be attached and ops->default_domain overrides all other mechanisms to choose the default domain. Fixes: 2ad56efa80db ("powerpc/iommu: Setup a default domain and remove set_platform_dma_ops") Fixes: 0f6a90436a57 ("iommu: Do not use IOMMU_DOMAIN_DMA if CONFIG_IOMMU_DMA is not enabled") Reported-by: Ovidiu Panait Closes: https://lore.kernel.org/linux-iommu/20240123165829.630276-1-ovidiu.panait@windriver.com/ Reported-by: Shivaprasad G Bhat Closes: https://lore.kernel.org/linux-iommu/170618452753.3805.4425669653666211728.stgit@ltcd48-lp2.aus.stglab.ibm.com/ Tested-by: Ovidiu Panait Tested-by: Shivaprasad G Bhat Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/0-v1-755bd21c4a64+525b8-iommu_def_dom_fix_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 68e648b557670..d14413916f93a 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1799,7 +1799,7 @@ iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) * domain. Do not use in new drivers. */ if (ops->default_domain) { - if (req_type) + if (req_type != ops->default_domain->type) return ERR_PTR(-EINVAL); return ops->default_domain; } @@ -1871,10 +1871,18 @@ static int iommu_get_def_domain_type(struct iommu_group *group, const struct iommu_ops *ops = dev_iommu_ops(dev); int type; - if (!ops->def_domain_type) - return cur_type; - - type = ops->def_domain_type(dev); + if (ops->default_domain) { + /* + * Drivers that declare a global static default_domain will + * always choose that. + */ + type = ops->default_domain->type; + } else { + if (ops->def_domain_type) + type = ops->def_domain_type(dev); + else + return cur_type; + } if (!type || cur_type == type) return cur_type; if (!cur_type) -- GitLab From fae6e669cdc52fdbb843e7fb1b8419642b6b8cba Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 30 Jan 2024 12:14:54 -0400 Subject: [PATCH 0490/1405] drm/tegra: Do not assume that a NULL domain means no DMA IOMMU Previously with tegra-smmu, even with CONFIG_IOMMU_DMA, the default domain could have been left as NULL. The NULL domain is specially recognized by host1x_client_iommu_attach() as meaning it is not the DMA domain and should be replaced with the special shared domain. This happened prior to the below commit because tegra-smmu was using the NULL domain to mean IDENTITY. Now that the domain is properly labled the test in DRM doesn't see NULL. Check for IDENTITY as well to enable the special domains. Fixes: c8cc2655cc6c ("iommu/tegra-smmu: Implement an IDENTITY domain") Reported-by: diogo.ivo@tecnico.ulisboa.pt Closes: https://lore.kernel.org/all/bbmhcoghrprmbdibnjum6lefix2eoquxrde7wyqeulm4xabmlm@b6jy32saugqh/ Tested-by: diogo.ivo@tecnico.ulisboa.pt Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/0-v1-3049f92c4812+16691-host1x_def_dom_fix_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/gpu/drm/tegra/drm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index ff36171c8fb70..a73cff7a30708 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -960,7 +960,8 @@ int host1x_client_iommu_attach(struct host1x_client *client) * not the shared IOMMU domain, don't try to attach it to a different * domain. This allows using the IOMMU-backed DMA API. */ - if (domain && domain != tegra->domain) + if (domain && domain->type != IOMMU_DOMAIN_IDENTITY && + domain != tegra->domain) return 0; if (tegra->domain) { -- GitLab From f66556c1333b3bd4806fc98ee07c419ab545e6ee Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 28 Dec 2023 22:43:25 +0100 Subject: [PATCH 0491/1405] media: atomisp: Adjust for v4l2_subdev_state handling changes in 6.8 The atomisp driver emulates a standard v4l2 device, which also works for non media-controller aware applications. Part of this requires making try_fmt calls on the sensor when a normal v4l2 app is making try_fmt calls on the /dev/video# mode. With the recent v4l2_subdev_state handling changes in 6.8 this no longer works, fixing this requires 2 changes: 1. The atomisp code was using its own internal v4l2_subdev_pad_config for this. Replace the internal v4l2_subdev_pad_config with allocating a full v4l2_subdev_state for storing the full try_fmt state. 2. The paths actually setting the fmt or crop selection now need to be passed the v4l2_subdev's active state, so that sensor drivers which are using the v4l2_subdev's active state to store their state keep working. Signed-off-by: Hans de Goede Signed-off-by: Mauro Carvalho Chehab --- .../staging/media/atomisp/pci/atomisp_cmd.c | 58 ++++++++++-------- .../media/atomisp/pci/atomisp_internal.h | 4 +- .../staging/media/atomisp/pci/atomisp_ioctl.c | 52 +++++++++------- .../staging/media/atomisp/pci/atomisp_v4l2.c | 59 ++++++++++++++----- 4 files changed, 111 insertions(+), 62 deletions(-) diff --git a/drivers/staging/media/atomisp/pci/atomisp_cmd.c b/drivers/staging/media/atomisp/pci/atomisp_cmd.c index f44e6412f4e31..d0db2efe00452 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_cmd.c +++ b/drivers/staging/media/atomisp/pci/atomisp_cmd.c @@ -3723,12 +3723,10 @@ void atomisp_get_padding(struct atomisp_device *isp, u32 width, u32 height, static int atomisp_set_crop(struct atomisp_device *isp, const struct v4l2_mbus_framefmt *format, + struct v4l2_subdev_state *sd_state, int which) { struct atomisp_input_subdev *input = &isp->inputs[isp->asd.input_curr]; - struct v4l2_subdev_state pad_state = { - .pads = &input->pad_cfg, - }; struct v4l2_subdev_selection sel = { .which = which, .target = V4L2_SEL_TGT_CROP, @@ -3754,7 +3752,7 @@ static int atomisp_set_crop(struct atomisp_device *isp, sel.r.left = ((input->native_rect.width - sel.r.width) / 2) & ~1; sel.r.top = ((input->native_rect.height - sel.r.height) / 2) & ~1; - ret = v4l2_subdev_call(input->camera, pad, set_selection, &pad_state, &sel); + ret = v4l2_subdev_call(input->camera, pad, set_selection, sd_state, &sel); if (ret) dev_err(isp->dev, "Error setting crop to %ux%u @%ux%u: %d\n", sel.r.width, sel.r.height, sel.r.left, sel.r.top, ret); @@ -3770,9 +3768,6 @@ int atomisp_try_fmt(struct atomisp_device *isp, struct v4l2_pix_format *f, const struct atomisp_format_bridge *fmt, *snr_fmt; struct atomisp_sub_device *asd = &isp->asd; struct atomisp_input_subdev *input = &isp->inputs[asd->input_curr]; - struct v4l2_subdev_state pad_state = { - .pads = &input->pad_cfg, - }; struct v4l2_subdev_format format = { .which = V4L2_SUBDEV_FORMAT_TRY, }; @@ -3809,11 +3804,16 @@ int atomisp_try_fmt(struct atomisp_device *isp, struct v4l2_pix_format *f, dev_dbg(isp->dev, "try_mbus_fmt: asking for %ux%u\n", format.format.width, format.format.height); - ret = atomisp_set_crop(isp, &format.format, V4L2_SUBDEV_FORMAT_TRY); - if (ret) - return ret; + v4l2_subdev_lock_state(input->try_sd_state); + + ret = atomisp_set_crop(isp, &format.format, input->try_sd_state, + V4L2_SUBDEV_FORMAT_TRY); + if (ret == 0) + ret = v4l2_subdev_call(input->camera, pad, set_fmt, + input->try_sd_state, &format); + + v4l2_subdev_unlock_state(input->try_sd_state); - ret = v4l2_subdev_call(input->camera, pad, set_fmt, &pad_state, &format); if (ret) return ret; @@ -4238,9 +4238,7 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev, const struct v4l2_p struct atomisp_device *isp = asd->isp; struct atomisp_input_subdev *input = &isp->inputs[asd->input_curr]; const struct atomisp_format_bridge *format; - struct v4l2_subdev_state pad_state = { - .pads = &input->pad_cfg, - }; + struct v4l2_subdev_state *act_sd_state; struct v4l2_subdev_format vformat = { .which = V4L2_SUBDEV_FORMAT_TRY, }; @@ -4268,12 +4266,18 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev, const struct v4l2_p /* Disable dvs if resolution can't be supported by sensor */ if (asd->params.video_dis_en && asd->run_mode->val == ATOMISP_RUN_MODE_VIDEO) { - ret = atomisp_set_crop(isp, &vformat.format, V4L2_SUBDEV_FORMAT_TRY); - if (ret) - return ret; + v4l2_subdev_lock_state(input->try_sd_state); + + ret = atomisp_set_crop(isp, &vformat.format, input->try_sd_state, + V4L2_SUBDEV_FORMAT_TRY); + if (ret == 0) { + vformat.which = V4L2_SUBDEV_FORMAT_TRY; + ret = v4l2_subdev_call(input->camera, pad, set_fmt, + input->try_sd_state, &vformat); + } + + v4l2_subdev_unlock_state(input->try_sd_state); - vformat.which = V4L2_SUBDEV_FORMAT_TRY; - ret = v4l2_subdev_call(input->camera, pad, set_fmt, &pad_state, &vformat); if (ret) return ret; @@ -4291,12 +4295,18 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev, const struct v4l2_p } } - ret = atomisp_set_crop(isp, &vformat.format, V4L2_SUBDEV_FORMAT_ACTIVE); - if (ret) - return ret; + act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera); + + ret = atomisp_set_crop(isp, &vformat.format, act_sd_state, + V4L2_SUBDEV_FORMAT_ACTIVE); + if (ret == 0) { + vformat.which = V4L2_SUBDEV_FORMAT_ACTIVE; + ret = v4l2_subdev_call(input->camera, pad, set_fmt, act_sd_state, &vformat); + } + + if (act_sd_state) + v4l2_subdev_unlock_state(act_sd_state); - vformat.which = V4L2_SUBDEV_FORMAT_ACTIVE; - ret = v4l2_subdev_call(input->camera, pad, set_fmt, NULL, &vformat); if (ret) return ret; diff --git a/drivers/staging/media/atomisp/pci/atomisp_internal.h b/drivers/staging/media/atomisp/pci/atomisp_internal.h index f7b4bee9574bd..d5b077e602cae 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_internal.h +++ b/drivers/staging/media/atomisp/pci/atomisp_internal.h @@ -132,8 +132,8 @@ struct atomisp_input_subdev { /* Sensor rects for sensors which support crop */ struct v4l2_rect native_rect; struct v4l2_rect active_rect; - /* Sensor pad_cfg for which == V4L2_SUBDEV_FORMAT_TRY calls */ - struct v4l2_subdev_pad_config pad_cfg; + /* Sensor state for which == V4L2_SUBDEV_FORMAT_TRY calls */ + struct v4l2_subdev_state *try_sd_state; struct v4l2_subdev *motor; diff --git a/drivers/staging/media/atomisp/pci/atomisp_ioctl.c b/drivers/staging/media/atomisp/pci/atomisp_ioctl.c index 01b7fa9b56a21..5b2d88c02d36a 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_ioctl.c +++ b/drivers/staging/media/atomisp/pci/atomisp_ioctl.c @@ -781,12 +781,20 @@ static int atomisp_enum_framesizes(struct file *file, void *priv, .which = V4L2_SUBDEV_FORMAT_ACTIVE, .code = input->code, }; + struct v4l2_subdev_state *act_sd_state; int ret; + if (!input->camera) + return -EINVAL; + if (input->crop_support) return atomisp_enum_framesizes_crop(isp, fsize); - ret = v4l2_subdev_call(input->camera, pad, enum_frame_size, NULL, &fse); + act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera); + ret = v4l2_subdev_call(input->camera, pad, enum_frame_size, + act_sd_state, &fse); + if (act_sd_state) + v4l2_subdev_unlock_state(act_sd_state); if (ret) return ret; @@ -803,18 +811,25 @@ static int atomisp_enum_frameintervals(struct file *file, void *priv, struct video_device *vdev = video_devdata(file); struct atomisp_device *isp = video_get_drvdata(vdev); struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd; + struct atomisp_input_subdev *input = &isp->inputs[asd->input_curr]; struct v4l2_subdev_frame_interval_enum fie = { - .code = atomisp_in_fmt_conv[0].code, + .code = atomisp_in_fmt_conv[0].code, .index = fival->index, .width = fival->width, .height = fival->height, .which = V4L2_SUBDEV_FORMAT_ACTIVE, }; + struct v4l2_subdev_state *act_sd_state; int ret; - ret = v4l2_subdev_call(isp->inputs[asd->input_curr].camera, - pad, enum_frame_interval, NULL, - &fie); + if (!input->camera) + return -EINVAL; + + act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera); + ret = v4l2_subdev_call(input->camera, pad, enum_frame_interval, + act_sd_state, &fie); + if (act_sd_state) + v4l2_subdev_unlock_state(act_sd_state); if (ret) return ret; @@ -830,30 +845,25 @@ static int atomisp_enum_fmt_cap(struct file *file, void *fh, struct video_device *vdev = video_devdata(file); struct atomisp_device *isp = video_get_drvdata(vdev); struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd; + struct atomisp_input_subdev *input = &isp->inputs[asd->input_curr]; struct v4l2_subdev_mbus_code_enum code = { .which = V4L2_SUBDEV_FORMAT_ACTIVE, }; const struct atomisp_format_bridge *format; - struct v4l2_subdev *camera; + struct v4l2_subdev_state *act_sd_state; unsigned int i, fi = 0; - int rval; + int ret; - camera = isp->inputs[asd->input_curr].camera; - if(!camera) { - dev_err(isp->dev, "%s(): camera is NULL, device is %s\n", - __func__, vdev->name); + if (!input->camera) return -EINVAL; - } - rval = v4l2_subdev_call(camera, pad, enum_mbus_code, NULL, &code); - if (rval == -ENOIOCTLCMD) { - dev_warn(isp->dev, - "enum_mbus_code pad op not supported by %s. Please fix your sensor driver!\n", - camera->name); - } - - if (rval) - return rval; + act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera); + ret = v4l2_subdev_call(input->camera, pad, enum_mbus_code, + act_sd_state, &code); + if (act_sd_state) + v4l2_subdev_unlock_state(act_sd_state); + if (ret) + return ret; for (i = 0; i < ARRAY_SIZE(atomisp_output_fmts); i++) { format = &atomisp_output_fmts[i]; diff --git a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c index c1c8501ec61f5..547e1444ad973 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c +++ b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c @@ -862,6 +862,9 @@ static void atomisp_unregister_entities(struct atomisp_device *isp) v4l2_device_unregister(&isp->v4l2_dev); media_device_unregister(&isp->media_dev); media_device_cleanup(&isp->media_dev); + + for (i = 0; i < isp->input_cnt; i++) + __v4l2_subdev_state_free(isp->inputs[i].try_sd_state); } static int atomisp_register_entities(struct atomisp_device *isp) @@ -933,32 +936,49 @@ static int atomisp_register_entities(struct atomisp_device *isp) static void atomisp_init_sensor(struct atomisp_input_subdev *input) { + static struct lock_class_key try_sd_state_key; struct v4l2_subdev_mbus_code_enum mbus_code_enum = { }; struct v4l2_subdev_frame_size_enum fse = { }; - struct v4l2_subdev_state sd_state = { - .pads = &input->pad_cfg, - }; struct v4l2_subdev_selection sel = { }; + struct v4l2_subdev_state *try_sd_state, *act_sd_state; int i, err; + /* + * FIXME: Drivers are not supposed to use __v4l2_subdev_state_alloc() + * but atomisp needs this for try_fmt on its /dev/video# node since + * it emulates a normal v4l2 device there, passing through try_fmt / + * set_fmt to the sensor. + */ + try_sd_state = __v4l2_subdev_state_alloc(input->camera, + "atomisp:try_sd_state->lock", &try_sd_state_key); + if (IS_ERR(try_sd_state)) + return; + + input->try_sd_state = try_sd_state; + + act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera); + mbus_code_enum.which = V4L2_SUBDEV_FORMAT_ACTIVE; - err = v4l2_subdev_call(input->camera, pad, enum_mbus_code, NULL, &mbus_code_enum); + err = v4l2_subdev_call(input->camera, pad, enum_mbus_code, + act_sd_state, &mbus_code_enum); if (!err) input->code = mbus_code_enum.code; sel.which = V4L2_SUBDEV_FORMAT_ACTIVE; sel.target = V4L2_SEL_TGT_NATIVE_SIZE; - err = v4l2_subdev_call(input->camera, pad, get_selection, NULL, &sel); + err = v4l2_subdev_call(input->camera, pad, get_selection, + act_sd_state, &sel); if (err) - return; + goto unlock_act_sd_state; input->native_rect = sel.r; sel.which = V4L2_SUBDEV_FORMAT_ACTIVE; sel.target = V4L2_SEL_TGT_CROP_DEFAULT; - err = v4l2_subdev_call(input->camera, pad, get_selection, NULL, &sel); + err = v4l2_subdev_call(input->camera, pad, get_selection, + act_sd_state, &sel); if (err) - return; + goto unlock_act_sd_state; input->active_rect = sel.r; @@ -973,7 +993,8 @@ static void atomisp_init_sensor(struct atomisp_input_subdev *input) fse.code = input->code; fse.which = V4L2_SUBDEV_FORMAT_ACTIVE; - err = v4l2_subdev_call(input->camera, pad, enum_frame_size, NULL, &fse); + err = v4l2_subdev_call(input->camera, pad, enum_frame_size, + act_sd_state, &fse); if (err) break; @@ -989,22 +1010,26 @@ static void atomisp_init_sensor(struct atomisp_input_subdev *input) * for padding, set the crop rect to cover the entire sensor instead * of only the default active area. * - * Do this for both try and active formats since the try_crop rect in - * pad_cfg may influence (clamp) future try_fmt calls with which == try. + * Do this for both try and active formats since the crop rect in + * try_sd_state may influence (clamp size) in calls with which == try. */ sel.which = V4L2_SUBDEV_FORMAT_TRY; sel.target = V4L2_SEL_TGT_CROP; sel.r = input->native_rect; - err = v4l2_subdev_call(input->camera, pad, set_selection, &sd_state, &sel); + v4l2_subdev_lock_state(input->try_sd_state); + err = v4l2_subdev_call(input->camera, pad, set_selection, + input->try_sd_state, &sel); + v4l2_subdev_unlock_state(input->try_sd_state); if (err) - return; + goto unlock_act_sd_state; sel.which = V4L2_SUBDEV_FORMAT_ACTIVE; sel.target = V4L2_SEL_TGT_CROP; sel.r = input->native_rect; - err = v4l2_subdev_call(input->camera, pad, set_selection, NULL, &sel); + err = v4l2_subdev_call(input->camera, pad, set_selection, + act_sd_state, &sel); if (err) - return; + goto unlock_act_sd_state; dev_info(input->camera->dev, "Supports crop native %dx%d active %dx%d binning %d\n", input->native_rect.width, input->native_rect.height, @@ -1012,6 +1037,10 @@ static void atomisp_init_sensor(struct atomisp_input_subdev *input) input->binning_support); input->crop_support = true; + +unlock_act_sd_state: + if (act_sd_state) + v4l2_subdev_unlock_state(act_sd_state); } int atomisp_register_device_nodes(struct atomisp_device *isp) -- GitLab From 6a9d552483d50953320b9d3b57abdee8d436f23f Mon Sep 17 00:00:00 2001 From: Sean Young Date: Thu, 13 Apr 2023 10:50:32 +0200 Subject: [PATCH 0492/1405] media: rc: bpf attach/detach requires write permission Note that bpf attach/detach also requires CAP_NET_ADMIN. Cc: stable@vger.kernel.org Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/bpf-lirc.c | 6 +++--- drivers/media/rc/lirc_dev.c | 5 ++++- drivers/media/rc/rc-core-priv.h | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c index fe17c7f98e810..52d82cbe7685f 100644 --- a/drivers/media/rc/bpf-lirc.c +++ b/drivers/media/rc/bpf-lirc.c @@ -253,7 +253,7 @@ int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) if (attr->attach_flags) return -EINVAL; - rcdev = rc_dev_get_from_fd(attr->target_fd); + rcdev = rc_dev_get_from_fd(attr->target_fd, true); if (IS_ERR(rcdev)) return PTR_ERR(rcdev); @@ -278,7 +278,7 @@ int lirc_prog_detach(const union bpf_attr *attr) if (IS_ERR(prog)) return PTR_ERR(prog); - rcdev = rc_dev_get_from_fd(attr->target_fd); + rcdev = rc_dev_get_from_fd(attr->target_fd, true); if (IS_ERR(rcdev)) { bpf_prog_put(prog); return PTR_ERR(rcdev); @@ -303,7 +303,7 @@ int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) if (attr->query.query_flags) return -EINVAL; - rcdev = rc_dev_get_from_fd(attr->query.target_fd); + rcdev = rc_dev_get_from_fd(attr->query.target_fd, false); if (IS_ERR(rcdev)) return PTR_ERR(rcdev); diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index a537734832c50..caad59f76793f 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -814,7 +814,7 @@ void __exit lirc_dev_exit(void) unregister_chrdev_region(lirc_base_dev, RC_DEV_MAX); } -struct rc_dev *rc_dev_get_from_fd(int fd) +struct rc_dev *rc_dev_get_from_fd(int fd, bool write) { struct fd f = fdget(fd); struct lirc_fh *fh; @@ -828,6 +828,9 @@ struct rc_dev *rc_dev_get_from_fd(int fd) return ERR_PTR(-EINVAL); } + if (write && !(f.file->f_mode & FMODE_WRITE)) + return ERR_PTR(-EPERM); + fh = f.file->private_data; dev = fh->rc; diff --git a/drivers/media/rc/rc-core-priv.h b/drivers/media/rc/rc-core-priv.h index ef1e95e1af7fc..7df949fc65e2b 100644 --- a/drivers/media/rc/rc-core-priv.h +++ b/drivers/media/rc/rc-core-priv.h @@ -325,7 +325,7 @@ void lirc_raw_event(struct rc_dev *dev, struct ir_raw_event ev); void lirc_scancode_event(struct rc_dev *dev, struct lirc_scancode *lsc); int lirc_register(struct rc_dev *dev); void lirc_unregister(struct rc_dev *dev); -struct rc_dev *rc_dev_get_from_fd(int fd); +struct rc_dev *rc_dev_get_from_fd(int fd, bool write); #else static inline int lirc_dev_init(void) { return 0; } static inline void lirc_dev_exit(void) {} -- GitLab From dc9ceb90c4b42c6e5c6757df1d6257110433788e Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Wed, 17 Jan 2024 09:14:19 +0100 Subject: [PATCH 0493/1405] media: ir_toy: fix a memleak in irtoy_tx When irtoy_command fails, buf should be freed since it is allocated by irtoy_tx, or there is a memleak. Fixes: 4114978dcd24 ("media: ir_toy: prevent device from hanging during transmit") Signed-off-by: Zhipeng Lu Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/ir_toy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/rc/ir_toy.c b/drivers/media/rc/ir_toy.c index 1968067092594..69e630d85262f 100644 --- a/drivers/media/rc/ir_toy.c +++ b/drivers/media/rc/ir_toy.c @@ -332,6 +332,7 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count) sizeof(COMMAND_SMODE_EXIT), STATE_COMMAND_NO_RESP); if (err) { dev_err(irtoy->dev, "exit sample mode: %d\n", err); + kfree(buf); return err; } @@ -339,6 +340,7 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count) sizeof(COMMAND_SMODE_ENTER), STATE_COMMAND); if (err) { dev_err(irtoy->dev, "enter sample mode: %d\n", err); + kfree(buf); return err; } -- GitLab From 346c84e281a963437b9fe9dfcd92c531630289de Mon Sep 17 00:00:00 2001 From: Sean Young Date: Tue, 30 Jan 2024 09:55:25 +0100 Subject: [PATCH 0494/1405] media: pwm-ir-tx: Depend on CONFIG_HIGH_RES_TIMERS Since commit 363d0e56285e ("media: pwm-ir-tx: Trigger edges from hrtimer interrupt context"), pwm-ir-tx uses high resolution timers for IR signal generation when the pwm can be used from atomic context. Ensure they are available. Fixes: 363d0e56285e ("media: pwm-ir-tx: Trigger edges from hrtimer interrupt context") Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/rc/Kconfig b/drivers/media/rc/Kconfig index 2afe67ffa285e..74d69ce22a33e 100644 --- a/drivers/media/rc/Kconfig +++ b/drivers/media/rc/Kconfig @@ -319,6 +319,7 @@ config IR_PWM_TX tristate "PWM IR transmitter" depends on LIRC depends on PWM + depends on HIGH_RES_TIMERS depends on OF help Say Y if you want to use a PWM based IR transmitter. This is -- GitLab From 3657e4cb5a8abd9edf6c944e022fe9ef06989960 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:20 +0000 Subject: [PATCH 0495/1405] ASoC: wm_adsp: Fix firmware file search order Check for the cases of system-specific bin file without a wmfw before falling back to looking for a generic wmfw. All system-specific options should be tried before falling back to loading a generic wmfw/bin. With the original code, the presence of a fallback generic wmfw on the filesystem would prevent using a system-specific tuning with a ROM firmware. Signed-off-by: Richard Fitzgerald Fixes: 0e7d82cbea8b ("ASoC: wm_adsp: Add support for loading bin files without wmfw") Link: https://msgid.link/r/20240129162737.497-2-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 44 ++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index c01e31175015c..bd60ceebb6a9c 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -823,6 +823,23 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, } } + /* Check system-specific bin without wmfw before falling back to generic */ + if (dsp->wmfw_optional && system_name) { + if (asoc_component_prefix) + wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, + cirrus_dir, system_name, + asoc_component_prefix, "bin"); + + if (!*coeff_firmware) + wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, + cirrus_dir, system_name, + NULL, "bin"); + + if (*coeff_firmware) + return 0; + } + + /* Check legacy location */ if (!wm_adsp_request_firmware_file(dsp, wmfw_firmware, wmfw_filename, "", NULL, NULL, "wmfw")) { wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, @@ -830,38 +847,15 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, return 0; } + /* Fall back to generic wmfw and optional matching bin */ ret = wm_adsp_request_firmware_file(dsp, wmfw_firmware, wmfw_filename, cirrus_dir, NULL, NULL, "wmfw"); - if (!ret) { + if (!ret || dsp->wmfw_optional) { wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, cirrus_dir, NULL, NULL, "bin"); return 0; } - if (dsp->wmfw_optional) { - if (system_name) { - if (asoc_component_prefix) - wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, - cirrus_dir, system_name, - asoc_component_prefix, "bin"); - - if (!*coeff_firmware) - wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, - cirrus_dir, system_name, - NULL, "bin"); - } - - if (!*coeff_firmware) - wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, - "", NULL, NULL, "bin"); - - if (!*coeff_firmware) - wm_adsp_request_firmware_file(dsp, coeff_firmware, coeff_filename, - cirrus_dir, NULL, NULL, "bin"); - - return 0; - } - adsp_err(dsp, "Failed to request firmware <%s>%s-%s-%s<-%s<%s>>.wmfw\n", cirrus_dir, dsp->part, dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix); -- GitLab From daf3f0f99cde93a066240462b7a87cdfeedc04c0 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:21 +0000 Subject: [PATCH 0496/1405] ASoC: wm_adsp: Don't overwrite fwf_name with the default There's no need to overwrite fwf_name with a kstrdup() of the cs_dsp part name. It is trivial to select either fwf_name or cs_dsp.part as the string to use when building the filename in wm_adsp_request_firmware_file(). This leaves fwf_name entirely owned by the codec driver. It also avoids problems with freeing the pointer. With the original code fwf_name was either a pointer owned by the codec driver, or a kstrdup() created by wm_adsp. This meant wm_adsp must free it if it set it, but not if the codec driver set it. The code was handling this by using devm_kstrdup(). But there is no absolute requirement that wm_adsp_common_init() must be called from probe(), so this was a pseudo-memory leak - each new call to wm_adsp_common_init() would allocate another block of memory but these would only be freed if the owning codec driver was removed. Signed-off-by: Richard Fitzgerald Link: https://msgid.link/r/20240129162737.497-3-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index bd60ceebb6a9c..36ea0dcdc7ab0 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -739,19 +739,25 @@ static int wm_adsp_request_firmware_file(struct wm_adsp *dsp, const char *filetype) { struct cs_dsp *cs_dsp = &dsp->cs_dsp; + const char *fwf; char *s, c; int ret = 0; + if (dsp->fwf_name) + fwf = dsp->fwf_name; + else + fwf = dsp->cs_dsp.name; + if (system_name && asoc_component_prefix) *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s-%s-%s.%s", dir, dsp->part, - dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name, + fwf, wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix, filetype); else if (system_name) *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s-%s.%s", dir, dsp->part, - dsp->fwf_name, wm_adsp_fw[dsp->fw].file, system_name, + fwf, wm_adsp_fw[dsp->fw].file, system_name, filetype); else - *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s.%s", dir, dsp->part, dsp->fwf_name, + *filename = kasprintf(GFP_KERNEL, "%s%s-%s-%s.%s", dir, dsp->part, fwf, wm_adsp_fw[dsp->fw].file, filetype); if (*filename == NULL) @@ -857,29 +863,18 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp, } adsp_err(dsp, "Failed to request firmware <%s>%s-%s-%s<-%s<%s>>.wmfw\n", - cirrus_dir, dsp->part, dsp->fwf_name, wm_adsp_fw[dsp->fw].file, - system_name, asoc_component_prefix); + cirrus_dir, dsp->part, + dsp->fwf_name ? dsp->fwf_name : dsp->cs_dsp.name, + wm_adsp_fw[dsp->fw].file, system_name, asoc_component_prefix); return -ENOENT; } static int wm_adsp_common_init(struct wm_adsp *dsp) { - char *p; - INIT_LIST_HEAD(&dsp->compr_list); INIT_LIST_HEAD(&dsp->buffer_list); - if (!dsp->fwf_name) { - p = devm_kstrdup(dsp->cs_dsp.dev, dsp->cs_dsp.name, GFP_KERNEL); - if (!p) - return -ENOMEM; - - dsp->fwf_name = p; - for (; *p != 0; ++p) - *p = tolower(*p); - } - return 0; } -- GitLab From ae861c466ee57e15a29d97629e1c564e3f714a4f Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:22 +0000 Subject: [PATCH 0497/1405] ASoC: cs35l56: cs35l56_component_remove() must clear cs35l56->component The cs35l56->component pointer is used by the suspend-resume handling to know whether the driver is fully instantiated. This is to prevent it queuing dsp_work which would result in calling wm_adsp when the driver is not an instantiated ASoC component. So this pointer must be cleared by cs35l56_component_remove(). Signed-off-by: Richard Fitzgerald Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Link: https://msgid.link/r/20240129162737.497-4-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 45b4de3eff94f..09944db4db30d 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -809,6 +809,8 @@ static void cs35l56_component_remove(struct snd_soc_component *component) struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component); cancel_work_sync(&cs35l56->dsp_work); + + cs35l56->component = NULL; } static int cs35l56_set_bias_level(struct snd_soc_component *component, -- GitLab From cd38ccbecdace1469b4e0cfb3ddeec72a3fad226 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:23 +0000 Subject: [PATCH 0498/1405] ASoC: cs35l56: cs35l56_component_remove() must clean up wm_adsp cs35l56_component_remove() must call wm_adsp_power_down() and wm_adsp2_component_remove(). Signed-off-by: Richard Fitzgerald Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Link: https://msgid.link/r/20240129162737.497-5-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 09944db4db30d..491da77112c34 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -810,6 +810,11 @@ static void cs35l56_component_remove(struct snd_soc_component *component) cancel_work_sync(&cs35l56->dsp_work); + if (cs35l56->dsp.cs_dsp.booted) + wm_adsp_power_down(&cs35l56->dsp); + + wm_adsp2_component_remove(&cs35l56->dsp, component); + cs35l56->component = NULL; } -- GitLab From 07687cd0539f8185b6ba0c0afba8473517116d6a Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:24 +0000 Subject: [PATCH 0499/1405] ASoC: cs35l56: Don't add the same register patch multiple times Move the call to cs35l56_set_patch() earlier in cs35l56_init() so that it only adds the register patch on first-time initialization. The call was after the post_soft_reset label, so every time this function was run to re-initialize the hardware after a reset it would call regmap_register_patch() and add the same reg_sequence again. Signed-off-by: Richard Fitzgerald Fixes: 898673b905b9 ("ASoC: cs35l56: Move shared data into a common data structure") Link: https://msgid.link/r/20240129162737.497-6-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 491da77112c34..ea5d2b2eb82a0 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -1159,6 +1159,10 @@ int cs35l56_init(struct cs35l56_private *cs35l56) if (ret < 0) return ret; + ret = cs35l56_set_patch(&cs35l56->base); + if (ret) + return ret; + /* Populate the DSP information with the revision and security state */ cs35l56->dsp.part = devm_kasprintf(cs35l56->base.dev, GFP_KERNEL, "cs35l56%s-%02x", cs35l56->base.secured ? "s" : "", cs35l56->base.rev); @@ -1197,10 +1201,6 @@ int cs35l56_init(struct cs35l56_private *cs35l56) if (ret) return ret; - ret = cs35l56_set_patch(&cs35l56->base); - if (ret) - return ret; - /* Registers could be dirty after soft reset or SoundWire enumeration */ regcache_sync(cs35l56->base.regmap); -- GitLab From 3739cc0733ba7eeafc08d4d4208d1f3c2451eabd Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:25 +0000 Subject: [PATCH 0500/1405] ASoC: cs35l56: Remove buggy checks from cs35l56_is_fw_reload_needed() Remove the check of fw_patched from cs35l56_is_fw_reload_needed(). Also remove the redundant check for control of the reset GPIO. The fw_patched flag is set when cs35l56_dsp_work() has completed its steps to download firmware and power-up wm_adsp. There was a check in cs35l56_is_fw_reload_needed() to make a quick exit of 'false' if !fw_patched. The original idea was that the system might be suspended before the driver has ever made any attempt to download firmware, and in that case the driver doesn't need to return to a patched state because it was never in a patched state. This check of fw_patched is buggy because it prevented ever recovering from a failed patch. If a previous attempt to patch and reboot the silicon had failed it would leave fw_patched==false. This would mean the driver never attempted another download even though the fault may have been cleared (by a hard reset, for example). It is also a redundant check because the calling code already makes a quick exit if cs35l56_component_probe() has not been called, which deals with the original intent of this check but in a safer way. The check for reset GPIO is redundant: if the silicon was hard-reset the FIRMWARE_MISSING flag will be 1. But this check created an expectation that the suspend/resume code toggles reset. This can't easily be protected against accidental code breakage. The only reason for the check was to skip runtime-resuming the driver to read the PROTECTION_STATUS register when it already knows it reset the silicon. But in that case the driver will have to be runtime-resumed to do the firmware download. So it created an assumption for no benefit. Signed-off-by: Richard Fitzgerald Fixes: 8a731fd37f8b ("ASoC: cs35l56: Move utility functions to shared file") Link: https://msgid.link/r/20240129162737.497-7-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-shared.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 953ba066bab1e..0cd572de73a90 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -400,17 +400,6 @@ int cs35l56_is_fw_reload_needed(struct cs35l56_base *cs35l56_base) unsigned int val; int ret; - /* Nothing to re-patch if we haven't done any patching yet. */ - if (!cs35l56_base->fw_patched) - return false; - - /* - * If we have control of RESET we will have asserted it so the firmware - * will need re-patching. - */ - if (cs35l56_base->reset_gpio) - return true; - /* * In secure mode FIRMWARE_MISSING is cleared by the BIOS loader so * can't be used here to test for memory retention. -- GitLab From 72a77d7631c6e392677c0134343cf5edcd3a4572 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:26 +0000 Subject: [PATCH 0501/1405] ASoC: cs35l56: Fix to ensure ASP1 registers match cache Add a dummy SUPPLY widget connected to the ASP that forces the chip registers to match the regmap cache when the ASP is powered-up. On a SoundWire system the ASP is free for use as a chip-to-chip interconnect. This can be either for the firmware on multiple CS35L56 to share reference audio; or as a bridge to another device. If it is a firmware interconnect it is owned by the firmware and the Linux driver should avoid writing the registers. However. If it is a bridge then Linux may take over and handle it as a normal codec-to-codec link. CS35L56 is designed for SDCA and a generic SDCA driver would know nothing about these chip-specific registers. So if the ASP is being used on a SoundWire system the firmware sets up the ASP registers. This means that we can't assume the default state of the ASP registers. But we don't know the initial state that the firmware set them to until after the firmware has been downloaded and booted, which can take several seconds when downloading multiple amps. To avoid blocking probe() for several seconds waiting for the firmware, the silicon defaults are assumed. This allows the machine driver to setup the ASP configuration during probe() without being blocked. If the ASP is hooked up and used, the SUPPLY widget ensures that the chip registers match what was configured in the regmap cache. If the machine driver does not hook up the ASP, it is assumed that it won't call any functions to configure the ASP DAI. Therefore the regmap cache will be clean for these registers so a regcache_sync() will not overwrite the chip registers. If the DAI is not hooked up, the dummy SUPPLY widget will not be invoked so it will never force-overwrite the chip registers. Backport note: This won't apply cleanly to kernels older than v6.6. Signed-off-by: Richard Fitzgerald Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Link: https://msgid.link/r/20240129162737.497-8-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 1 + sound/soc/codecs/cs35l56-shared.c | 41 +++++++++++++++++++++++++++++++ sound/soc/codecs/cs35l56.c | 21 ++++++++++++++++ 3 files changed, 63 insertions(+) diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index 8c18e8b6d27d2..4db36c893d9d2 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -272,6 +272,7 @@ extern const char * const cs35l56_tx_input_texts[CS35L56_NUM_INPUT_SRC]; extern const unsigned int cs35l56_tx_input_values[CS35L56_NUM_INPUT_SRC]; int cs35l56_set_patch(struct cs35l56_base *cs35l56_base); +int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base); int cs35l56_mbox_send(struct cs35l56_base *cs35l56_base, unsigned int command); int cs35l56_firmware_shutdown(struct cs35l56_base *cs35l56_base); int cs35l56_wait_for_firmware_boot(struct cs35l56_base *cs35l56_base); diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 0cd572de73a90..35789ffc63af0 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -195,6 +195,47 @@ static bool cs35l56_volatile_reg(struct device *dev, unsigned int reg) } } +/* + * The firmware boot sequence can overwrite the ASP1 config registers so that + * they don't match regmap's view of their values. Rewrite the values from the + * regmap cache into the hardware registers. + */ +int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base) +{ + struct reg_sequence asp1_regs[] = { + { .reg = CS35L56_ASP1_ENABLES1 }, + { .reg = CS35L56_ASP1_CONTROL1 }, + { .reg = CS35L56_ASP1_CONTROL2 }, + { .reg = CS35L56_ASP1_CONTROL3 }, + { .reg = CS35L56_ASP1_FRAME_CONTROL1 }, + { .reg = CS35L56_ASP1_FRAME_CONTROL5 }, + { .reg = CS35L56_ASP1_DATA_CONTROL1 }, + { .reg = CS35L56_ASP1_DATA_CONTROL5 }, + }; + int i, ret; + + /* Read values from regmap cache into a write sequence */ + for (i = 0; i < ARRAY_SIZE(asp1_regs); ++i) { + ret = regmap_read(cs35l56_base->regmap, asp1_regs[i].reg, &asp1_regs[i].def); + if (ret) + goto err; + } + + /* Write the values cache-bypassed so that they will be written to silicon */ + ret = regmap_multi_reg_write_bypassed(cs35l56_base->regmap, asp1_regs, + ARRAY_SIZE(asp1_regs)); + if (ret) + goto err; + + return 0; + +err: + dev_err(cs35l56_base->dev, "Failed to sync ASP1 registers: %d\n", ret); + + return ret; +} +EXPORT_SYMBOL_NS_GPL(cs35l56_force_sync_asp1_registers_from_cache, SND_SOC_CS35L56_SHARED); + int cs35l56_mbox_send(struct cs35l56_base *cs35l56_base, unsigned int command) { unsigned int val; diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index ea5d2b2eb82a0..41aa79848b159 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -148,6 +148,21 @@ static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_sdw1tx4_enum, static const struct snd_kcontrol_new sdw1_tx4_mux = SOC_DAPM_ENUM("SDW1TX4 SRC", cs35l56_sdw1tx4_enum); +static int cs35l56_asp1_cfg_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); + struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component); + + switch (event) { + case SND_SOC_DAPM_PRE_PMU: + /* Override register values set by firmware boot */ + return cs35l56_force_sync_asp1_registers_from_cache(&cs35l56->base); + default: + return 0; + } +} + static int cs35l56_play_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { @@ -184,6 +199,9 @@ static const struct snd_soc_dapm_widget cs35l56_dapm_widgets[] = { SND_SOC_DAPM_REGULATOR_SUPPLY("VDD_B", 0, 0), SND_SOC_DAPM_REGULATOR_SUPPLY("VDD_AMP", 0, 0), + SND_SOC_DAPM_SUPPLY("ASP1 CFG", SND_SOC_NOPM, 0, 0, cs35l56_asp1_cfg_event, + SND_SOC_DAPM_PRE_PMU), + SND_SOC_DAPM_SUPPLY("PLAY", SND_SOC_NOPM, 0, 0, cs35l56_play_event, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD), @@ -251,6 +269,9 @@ static const struct snd_soc_dapm_route cs35l56_audio_map[] = { { "AMP", NULL, "VDD_B" }, { "AMP", NULL, "VDD_AMP" }, + { "ASP1 Playback", NULL, "ASP1 CFG" }, + { "ASP1 Capture", NULL, "ASP1 CFG" }, + { "ASP1 Playback", NULL, "PLAY" }, { "SDW1 Playback", NULL, "PLAY" }, -- GitLab From 782e6c538be43a17e34f552ab49e8c713cac7883 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:27 +0000 Subject: [PATCH 0502/1405] ASoC: cs35l56: Fix default SDW TX mixer registers Patch the SDW TX mixer registers to silicon defaults. CS35L56 is designed for SDCA and a generic SDCA driver would know nothing about these chip-specific registers. So the firmware sets up the SDW TX mixer registers to whatever audio is relevant on a specific system. This means that the driver cannot assume the initial values of these registers. But Linux has ALSA controls to configure routing, so the registers can be patched to silicon default and the ALSA controls used to select what audio to feed back to the host capture path. Backport note: This won't apply to kernels older than v6.6. Signed-off-by: Richard Fitzgerald Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Link: https://msgid.link/r/20240129162737.497-9-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-shared.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 35789ffc63af0..a812abf90836e 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -12,6 +12,15 @@ #include "cs35l56.h" static const struct reg_sequence cs35l56_patch[] = { + /* + * Firmware can change these to non-defaults to satisfy SDCA. + * Ensure that they are at known defaults. + */ + { CS35L56_SWIRE_DP3_CH1_INPUT, 0x00000018 }, + { CS35L56_SWIRE_DP3_CH2_INPUT, 0x00000019 }, + { CS35L56_SWIRE_DP3_CH3_INPUT, 0x00000029 }, + { CS35L56_SWIRE_DP3_CH4_INPUT, 0x00000028 }, + /* These are not reset by a soft-reset, so patch to defaults. */ { CS35L56_MAIN_RENDER_USER_MUTE, 0x00000000 }, { CS35L56_MAIN_RENDER_USER_VOLUME, 0x00000000 }, -- GitLab From 856ce8982169acb31a25c5f2ecd2570ab8a6af46 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:28 +0000 Subject: [PATCH 0503/1405] ALSA: hda: cs35l56: Initialize all ASP1 registers Add ASP1_FRAME_CONTROL1, ASP1_FRAME_CONTROL5 and the ASP1_TX?_INPUT registers to the sequence used to initialize the ASP configuration. Write this sequence to the cache and directly to the registers to ensure that they match. A system-specific firmware can patch these registers to values that are not the silicon default, so that the CS35L56 boots already in the configuration used by Windows or by "driverless" Windows setups such as factory tuning. These may not match how Linux is configuring the HDA codec. And anyway on Linux the ALSA controls are used to configure routing options. Signed-off-by: Richard Fitzgerald Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Link: https://msgid.link/r/20240129162737.497-10-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/pci/hda/cs35l56_hda.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index b61e1de8c4bf9..f22bcb104a4eb 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -30,14 +30,23 @@ * ASP1_RX_WL = 24 bits per sample * ASP1_TX_WL = 24 bits per sample * ASP1_RXn_EN 1..3 and ASP1_TXn_EN 1..4 disabled + * + * Override any Windows-specific mixer settings applied by the firmware. */ static const struct reg_sequence cs35l56_hda_dai_config[] = { { CS35L56_ASP1_CONTROL1, 0x00000021 }, { CS35L56_ASP1_CONTROL2, 0x20200200 }, { CS35L56_ASP1_CONTROL3, 0x00000003 }, + { CS35L56_ASP1_FRAME_CONTROL1, 0x03020100 }, + { CS35L56_ASP1_FRAME_CONTROL5, 0x00020100 }, { CS35L56_ASP1_DATA_CONTROL5, 0x00000018 }, { CS35L56_ASP1_DATA_CONTROL1, 0x00000018 }, { CS35L56_ASP1_ENABLES1, 0x00000000 }, + { CS35L56_ASP1TX1_INPUT, 0x00000018 }, + { CS35L56_ASP1TX2_INPUT, 0x00000019 }, + { CS35L56_ASP1TX3_INPUT, 0x00000020 }, + { CS35L56_ASP1TX4_INPUT, 0x00000028 }, + }; static void cs35l56_hda_play(struct cs35l56_hda *cs35l56) @@ -133,6 +142,10 @@ static int cs35l56_hda_runtime_resume(struct device *dev) } } + ret = cs35l56_force_sync_asp1_registers_from_cache(&cs35l56->base); + if (ret) + goto err; + return 0; err: @@ -976,6 +989,9 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int id) regmap_multi_reg_write(cs35l56->base.regmap, cs35l56_hda_dai_config, ARRAY_SIZE(cs35l56_hda_dai_config)); + ret = cs35l56_force_sync_asp1_registers_from_cache(&cs35l56->base); + if (ret) + goto err; /* * By default only enable one ASP1TXn, where n=amplifier index, -- GitLab From 07f7d6e7a124d3e4de36771e2a4926d0e31c2258 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:29 +0000 Subject: [PATCH 0504/1405] ASoC: cs35l56: Fix for initializing ASP1 mixer registers Defer initializing the state of the ASP1 mixer registers until the firmware has been downloaded and rebooted. On a SoundWire system the ASP is free for use as a chip-to-chip interconnect. This can be either for the firmware on multiple CS35L56 to share reference audio; or as a bridge to another device. If it is a firmware interconnect it is owned by the firmware and the Linux driver should avoid writing the registers. However, if it is a bridge then Linux may take over and handle it as a normal codec-to-codec link. Even if the ASP is used as a firmware-firmware interconnect it is useful to have ALSA controls for the ASP mixer. They are at least useful for debugging. CS35L56 is designed for SDCA and a generic SDCA driver would know nothing about these chip-specific registers. So if the ASP is being used on a SoundWire system the firmware sets up the ASP mixer registers. This means that we can't assume the default state of these registers. But we don't know the initial state that the firmware set them to until after the firmware has been downloaded and booted, which can take several seconds when downloading multiple amps. DAPM normally reads the initial state of mux registers during probe() but this would mean blocking probe() for several seconds until the firmware has initialized them. To avoid this, the mixer muxes are set SND_SOC_NOPM to prevent DAPM trying to read the register state. Custom get/set callbacks are implemented for ALSA control access, and these can safely block waiting for the firmware download. After the firmware download has completed, the state of the mux registers is known so a work job is queued to call snd_soc_dapm_mux_update_power() on each of the mux widgets. Backport note: This won't apply cleanly to kernels older than v6.6. Signed-off-by: Richard Fitzgerald Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Link: https://msgid.link/r/20240129162737.497-11-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-shared.c | 7 +- sound/soc/codecs/cs35l56.c | 172 +++++++++++++++++++++++++++--- sound/soc/codecs/cs35l56.h | 1 + 3 files changed, 163 insertions(+), 17 deletions(-) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index a812abf90836e..9a70db0fa4188 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -43,10 +43,9 @@ static const struct reg_default cs35l56_reg_defaults[] = { { CS35L56_ASP1_FRAME_CONTROL5, 0x00020100 }, { CS35L56_ASP1_DATA_CONTROL1, 0x00000018 }, { CS35L56_ASP1_DATA_CONTROL5, 0x00000018 }, - { CS35L56_ASP1TX1_INPUT, 0x00000018 }, - { CS35L56_ASP1TX2_INPUT, 0x00000019 }, - { CS35L56_ASP1TX3_INPUT, 0x00000020 }, - { CS35L56_ASP1TX4_INPUT, 0x00000028 }, + + /* no defaults for ASP1TX mixer */ + { CS35L56_SWIRE_DP3_CH1_INPUT, 0x00000018 }, { CS35L56_SWIRE_DP3_CH2_INPUT, 0x00000019 }, { CS35L56_SWIRE_DP3_CH3_INPUT, 0x00000029 }, diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 41aa79848b159..1b51650a19fff 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -59,6 +59,135 @@ static int cs35l56_dspwait_put_volsw(struct snd_kcontrol *kcontrol, return snd_soc_put_volsw(kcontrol, ucontrol); } +static const unsigned short cs35l56_asp1_mixer_regs[] = { + CS35L56_ASP1TX1_INPUT, CS35L56_ASP1TX2_INPUT, + CS35L56_ASP1TX3_INPUT, CS35L56_ASP1TX4_INPUT, +}; + +static const char * const cs35l56_asp1_mux_control_names[] = { + "ASP1 TX1 Source", "ASP1 TX2 Source", "ASP1 TX3 Source", "ASP1 TX4 Source" +}; + +static int cs35l56_dspwait_asp1tx_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = snd_soc_dapm_kcontrol_component(kcontrol); + struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component); + struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; + int index = e->shift_l; + unsigned int addr, val; + int ret; + + /* Wait for mux to be initialized */ + cs35l56_wait_dsp_ready(cs35l56); + flush_work(&cs35l56->mux_init_work); + + addr = cs35l56_asp1_mixer_regs[index]; + ret = regmap_read(cs35l56->base.regmap, addr, &val); + if (ret) + return ret; + + val &= CS35L56_ASP_TXn_SRC_MASK; + ucontrol->value.enumerated.item[0] = snd_soc_enum_val_to_item(e, val); + + return 0; +} + +static int cs35l56_dspwait_asp1tx_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = snd_soc_dapm_kcontrol_component(kcontrol); + struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kcontrol); + struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component); + struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; + int item = ucontrol->value.enumerated.item[0]; + int index = e->shift_l; + unsigned int addr, val; + bool changed; + int ret; + + /* Wait for mux to be initialized */ + cs35l56_wait_dsp_ready(cs35l56); + flush_work(&cs35l56->mux_init_work); + + addr = cs35l56_asp1_mixer_regs[index]; + val = snd_soc_enum_item_to_val(e, item); + + ret = regmap_update_bits_check(cs35l56->base.regmap, addr, + CS35L56_ASP_TXn_SRC_MASK, val, &changed); + if (!ret) + return ret; + + if (changed) + snd_soc_dapm_mux_update_power(dapm, kcontrol, item, e, NULL); + + return changed; +} + +static void cs35l56_mark_asp1_mixer_widgets_dirty(struct cs35l56_private *cs35l56) +{ + struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(cs35l56->component); + const char *prefix = cs35l56->component->name_prefix; + char full_name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; + const char *name; + struct snd_kcontrol *kcontrol; + struct soc_enum *e; + unsigned int val[4]; + int i, item, ret; + + /* + * Resume so we can read the registers from silicon if the regmap + * cache has not yet been populated. + */ + ret = pm_runtime_resume_and_get(cs35l56->base.dev); + if (ret < 0) + return; + + ret = regmap_bulk_read(cs35l56->base.regmap, CS35L56_ASP1TX1_INPUT, + val, ARRAY_SIZE(val)); + + pm_runtime_mark_last_busy(cs35l56->base.dev); + pm_runtime_put_autosuspend(cs35l56->base.dev); + + if (ret) { + dev_err(cs35l56->base.dev, "Failed to read ASP1 mixer regs: %d\n", ret); + return; + } + + snd_soc_card_mutex_lock(dapm->card); + WARN_ON(!dapm->card->instantiated); + + for (i = 0; i < ARRAY_SIZE(cs35l56_asp1_mux_control_names); ++i) { + name = cs35l56_asp1_mux_control_names[i]; + + if (prefix) { + snprintf(full_name, sizeof(full_name), "%s %s", prefix, name); + name = full_name; + } + + kcontrol = snd_soc_card_get_kcontrol(dapm->card, name); + if (!kcontrol) { + dev_warn(cs35l56->base.dev, "Could not find control %s\n", name); + continue; + } + + e = (struct soc_enum *)kcontrol->private_value; + item = snd_soc_enum_val_to_item(e, val[i] & CS35L56_ASP_TXn_SRC_MASK); + snd_soc_dapm_mux_update_power(dapm, kcontrol, item, e, NULL); + } + + snd_soc_card_mutex_unlock(dapm->card); +} + +static void cs35l56_mux_init_work(struct work_struct *work) +{ + struct cs35l56_private *cs35l56 = container_of(work, + struct cs35l56_private, + mux_init_work); + + cs35l56_mark_asp1_mixer_widgets_dirty(cs35l56); +} + static DECLARE_TLV_DB_SCALE(vol_tlv, -10000, 25, 0); static const struct snd_kcontrol_new cs35l56_controls[] = { @@ -77,40 +206,44 @@ static const struct snd_kcontrol_new cs35l56_controls[] = { }; static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_asp1tx1_enum, - CS35L56_ASP1TX1_INPUT, - 0, CS35L56_ASP_TXn_SRC_MASK, + SND_SOC_NOPM, + 0, 0, cs35l56_tx_input_texts, cs35l56_tx_input_values); static const struct snd_kcontrol_new asp1_tx1_mux = - SOC_DAPM_ENUM("ASP1TX1 SRC", cs35l56_asp1tx1_enum); + SOC_DAPM_ENUM_EXT("ASP1TX1 SRC", cs35l56_asp1tx1_enum, + cs35l56_dspwait_asp1tx_get, cs35l56_dspwait_asp1tx_put); static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_asp1tx2_enum, - CS35L56_ASP1TX2_INPUT, - 0, CS35L56_ASP_TXn_SRC_MASK, + SND_SOC_NOPM, + 1, 0, cs35l56_tx_input_texts, cs35l56_tx_input_values); static const struct snd_kcontrol_new asp1_tx2_mux = - SOC_DAPM_ENUM("ASP1TX2 SRC", cs35l56_asp1tx2_enum); + SOC_DAPM_ENUM_EXT("ASP1TX2 SRC", cs35l56_asp1tx2_enum, + cs35l56_dspwait_asp1tx_get, cs35l56_dspwait_asp1tx_put); static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_asp1tx3_enum, - CS35L56_ASP1TX3_INPUT, - 0, CS35L56_ASP_TXn_SRC_MASK, + SND_SOC_NOPM, + 2, 0, cs35l56_tx_input_texts, cs35l56_tx_input_values); static const struct snd_kcontrol_new asp1_tx3_mux = - SOC_DAPM_ENUM("ASP1TX3 SRC", cs35l56_asp1tx3_enum); + SOC_DAPM_ENUM_EXT("ASP1TX3 SRC", cs35l56_asp1tx3_enum, + cs35l56_dspwait_asp1tx_get, cs35l56_dspwait_asp1tx_put); static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_asp1tx4_enum, - CS35L56_ASP1TX4_INPUT, - 0, CS35L56_ASP_TXn_SRC_MASK, + SND_SOC_NOPM, + 3, 0, cs35l56_tx_input_texts, cs35l56_tx_input_values); static const struct snd_kcontrol_new asp1_tx4_mux = - SOC_DAPM_ENUM("ASP1TX4 SRC", cs35l56_asp1tx4_enum); + SOC_DAPM_ENUM_EXT("ASP1TX4 SRC", cs35l56_asp1tx4_enum, + cs35l56_dspwait_asp1tx_get, cs35l56_dspwait_asp1tx_put); static SOC_VALUE_ENUM_SINGLE_DECL(cs35l56_sdw1tx1_enum, CS35L56_SWIRE_DP3_CH1_INPUT, @@ -785,6 +918,15 @@ static void cs35l56_dsp_work(struct work_struct *work) else cs35l56_patch(cs35l56); + + /* + * Set starting value of ASP1 mux widgets. Updating a mux takes + * the DAPM mutex. Post this to a separate job so that DAPM + * power-up can wait for dsp_work to complete without deadlocking + * on the DAPM mutex. + */ + queue_work(cs35l56->dsp_wq, &cs35l56->mux_init_work); + pm_runtime_mark_last_busy(cs35l56->base.dev); pm_runtime_put_autosuspend(cs35l56->base.dev); } @@ -830,6 +972,7 @@ static void cs35l56_component_remove(struct snd_soc_component *component) struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component); cancel_work_sync(&cs35l56->dsp_work); + cancel_work_sync(&cs35l56->mux_init_work); if (cs35l56->dsp.cs_dsp.booted) wm_adsp_power_down(&cs35l56->dsp); @@ -897,8 +1040,10 @@ int cs35l56_system_suspend(struct device *dev) dev_dbg(dev, "system_suspend\n"); - if (cs35l56->component) + if (cs35l56->component) { flush_work(&cs35l56->dsp_work); + cancel_work_sync(&cs35l56->mux_init_work); + } /* * The interrupt line is normally shared, but after we start suspending @@ -1049,6 +1194,7 @@ static int cs35l56_dsp_init(struct cs35l56_private *cs35l56) return -ENOMEM; INIT_WORK(&cs35l56->dsp_work, cs35l56_dsp_work); + INIT_WORK(&cs35l56->mux_init_work, cs35l56_mux_init_work); dsp = &cs35l56->dsp; cs35l56_init_cs_dsp(&cs35l56->base, &dsp->cs_dsp); diff --git a/sound/soc/codecs/cs35l56.h b/sound/soc/codecs/cs35l56.h index 8159c3e217d93..dc2fe4c91e67b 100644 --- a/sound/soc/codecs/cs35l56.h +++ b/sound/soc/codecs/cs35l56.h @@ -34,6 +34,7 @@ struct cs35l56_private { struct wm_adsp dsp; /* must be first member */ struct cs35l56_base base; struct work_struct dsp_work; + struct work_struct mux_init_work; struct workqueue_struct *dsp_wq; struct snd_soc_component *component; struct regulator_bulk_data supplies[CS35L56_NUM_BULK_SUPPLIES]; -- GitLab From f6c967941c5d6fa526fdd64733a8d86bf2bfab31 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:30 +0000 Subject: [PATCH 0505/1405] ASoC: cs35l56: Fix misuse of wm_adsp 'part' string for silicon revision Put the silicon revision and secured flag in the wm_adsp fwf_name string instead of including them in the part string. This changes the format of the firmware name string from cs35l56[s]-rev-misc[-system_name] to cs35l56-rev[-s]-misc[-system_name] No firmware files have been published, so this doesn't cause a compatibility break. Silicon revision and secured flag are included in the firmware filename to pick a firmware compatible with the part. These strings were being added to the part string, but that is a misuse of the string. The correct place for these is the fwf_name string, which is specifically intended to select between multiple firmware files for the same part. Backport note: This won't apply to kernels older than v6.6. Signed-off-by: Richard Fitzgerald Fixes: 608f1b0dbdde ("ASoC: cs35l56: Move DSP part string generation so that it is done only once") Link: https://msgid.link/r/20240129162737.497-12-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 1b51650a19fff..8899c02c6dea3 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -907,6 +907,18 @@ static void cs35l56_dsp_work(struct work_struct *work) pm_runtime_get_sync(cs35l56->base.dev); + /* Populate fw file qualifier with the revision and security state */ + if (!cs35l56->dsp.fwf_name) { + cs35l56->dsp.fwf_name = kasprintf(GFP_KERNEL, "%02x%s-dsp1", + cs35l56->base.rev, + cs35l56->base.secured ? "-s" : ""); + if (!cs35l56->dsp.fwf_name) + goto err; + } + + dev_dbg(cs35l56->base.dev, "DSP fwf name: '%s' system name: '%s'\n", + cs35l56->dsp.fwf_name, cs35l56->dsp.system_name); + /* * When the device is running in secure mode the firmware files can * only contain insecure tunings and therefore we do not need to @@ -926,7 +938,7 @@ static void cs35l56_dsp_work(struct work_struct *work) * on the DAPM mutex. */ queue_work(cs35l56->dsp_wq, &cs35l56->mux_init_work); - +err: pm_runtime_mark_last_busy(cs35l56->base.dev); pm_runtime_put_autosuspend(cs35l56->base.dev); } @@ -979,6 +991,9 @@ static void cs35l56_component_remove(struct snd_soc_component *component) wm_adsp2_component_remove(&cs35l56->dsp, component); + kfree(cs35l56->dsp.fwf_name); + cs35l56->dsp.fwf_name = NULL; + cs35l56->component = NULL; } @@ -1330,12 +1345,6 @@ int cs35l56_init(struct cs35l56_private *cs35l56) if (ret) return ret; - /* Populate the DSP information with the revision and security state */ - cs35l56->dsp.part = devm_kasprintf(cs35l56->base.dev, GFP_KERNEL, "cs35l56%s-%02x", - cs35l56->base.secured ? "s" : "", cs35l56->base.rev); - if (!cs35l56->dsp.part) - return -ENOMEM; - if (!cs35l56->base.reset_gpio) { dev_dbg(cs35l56->base.dev, "No reset gpio: using soft reset\n"); cs35l56->soft_resetting = true; -- GitLab From f4ef5149953f2fc04907ca5b34db3df667dcddef Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:31 +0000 Subject: [PATCH 0506/1405] ASoC: cs35l56: Firmware file must match the version of preloaded firmware Check during initialization whether the firmware is already patched. If so, include the firmware version in the wm_adsp fwf_name string. If the firmware has already been patched by the BIOS the driver can only replace it if it has control of hard RESET. If the driver cannot replace the firmware, it can still load a wmfw (for ALSA control definitions) and/or a bin (for additional tunings). But these must match the version of firmware that is running on the CS35L56. The firmware is pre-patched if FIRMWARE_MISSING == 0. Including the firmware version in the fwf_name string will qualify the firmware file name: Normal (unpatched or replaceable firmware): cs35l56-rev-dsp1-misc[-system_name].[wmfw|bin] Preloaded firmware: cs35l56-rev[-s]-VVVVVV-dsp1-misc[-system_name].[wmfw|bin] Where: [-s] is an optional -s added into the name for a secured CS35L56 VVVVVV is the 24-bit firmware version in hexadecimal. Signed-off-by: Richard Fitzgerald Fixes: 608f1b0dbdde ("ASoC: cs35l56: Move DSP part string generation so that it is done only once") Link: https://msgid.link/r/20240129162737.497-13-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 3 ++ sound/soc/codecs/cs35l56-shared.c | 36 +++++++++++++++++++-- sound/soc/codecs/cs35l56.c | 52 +++++++++++++++++-------------- 3 files changed, 65 insertions(+), 26 deletions(-) diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index 4db36c893d9d2..5d6aefc41e64a 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -75,6 +75,7 @@ #define CS35L56_DSP1_AHBM_WINDOW_DEBUG_0 0x25E2040 #define CS35L56_DSP1_AHBM_WINDOW_DEBUG_1 0x25E2044 #define CS35L56_DSP1_XMEM_UNPACKED24_0 0x2800000 +#define CS35L56_DSP1_FW_VER 0x2800010 #define CS35L56_DSP1_HALO_STATE_A1 0x2801E58 #define CS35L56_DSP1_HALO_STATE 0x28021E0 #define CS35L56_DSP1_PM_CUR_STATE_A1 0x2804000 @@ -285,6 +286,8 @@ int cs35l56_is_fw_reload_needed(struct cs35l56_base *cs35l56_base); int cs35l56_runtime_suspend_common(struct cs35l56_base *cs35l56_base); int cs35l56_runtime_resume_common(struct cs35l56_base *cs35l56_base, bool is_soundwire); void cs35l56_init_cs_dsp(struct cs35l56_base *cs35l56_base, struct cs_dsp *cs_dsp); +int cs35l56_read_prot_status(struct cs35l56_base *cs35l56_base, + bool *fw_missing, unsigned int *fw_version); int cs35l56_hw_init(struct cs35l56_base *cs35l56_base); int cs35l56_get_bclk_freq_id(unsigned int freq); void cs35l56_fill_supply_names(struct regulator_bulk_data *data); diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 9a70db0fa4188..33835535ef843 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -628,10 +628,35 @@ void cs35l56_init_cs_dsp(struct cs35l56_base *cs35l56_base, struct cs_dsp *cs_ds } EXPORT_SYMBOL_NS_GPL(cs35l56_init_cs_dsp, SND_SOC_CS35L56_SHARED); +int cs35l56_read_prot_status(struct cs35l56_base *cs35l56_base, + bool *fw_missing, unsigned int *fw_version) +{ + unsigned int prot_status; + int ret; + + ret = regmap_read(cs35l56_base->regmap, CS35L56_PROTECTION_STATUS, &prot_status); + if (ret) { + dev_err(cs35l56_base->dev, "Get PROTECTION_STATUS failed: %d\n", ret); + return ret; + } + + *fw_missing = !!(prot_status & CS35L56_FIRMWARE_MISSING); + + ret = regmap_read(cs35l56_base->regmap, CS35L56_DSP1_FW_VER, fw_version); + if (ret) { + dev_err(cs35l56_base->dev, "Get FW VER failed: %d\n", ret); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cs35l56_read_prot_status, SND_SOC_CS35L56_SHARED); + int cs35l56_hw_init(struct cs35l56_base *cs35l56_base) { int ret; - unsigned int devid, revid, otpid, secured; + unsigned int devid, revid, otpid, secured, fw_ver; + bool fw_missing; /* * When the system is not using a reset_gpio ensure the device is @@ -690,8 +715,13 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base) return ret; } - dev_info(cs35l56_base->dev, "Cirrus Logic CS35L56%s Rev %02X OTP%d\n", - cs35l56_base->secured ? "s" : "", cs35l56_base->rev, otpid); + ret = cs35l56_read_prot_status(cs35l56_base, &fw_missing, &fw_ver); + if (ret) + return ret; + + dev_info(cs35l56_base->dev, "Cirrus Logic CS35L56%s Rev %02X OTP%d fw:%d.%d.%d (patched=%u)\n", + cs35l56_base->secured ? "s" : "", cs35l56_base->rev, otpid, + fw_ver >> 16, (fw_ver >> 8) & 0xff, fw_ver & 0xff, !fw_missing); /* Wake source and *_BLOCKED interrupts default to unmasked, so mask them */ regmap_write(cs35l56_base->regmap, CS35L56_IRQ1_MASK_20, 0xffffffff); diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 8899c02c6dea3..597677422547c 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -804,7 +804,7 @@ static struct snd_soc_dai_driver cs35l56_dai[] = { } }; -static void cs35l56_secure_patch(struct cs35l56_private *cs35l56) +static void cs35l56_reinit_patch(struct cs35l56_private *cs35l56) { int ret; @@ -816,19 +816,10 @@ static void cs35l56_secure_patch(struct cs35l56_private *cs35l56) cs35l56_mbox_send(&cs35l56->base, CS35L56_MBOX_CMD_AUDIO_REINIT); } -static void cs35l56_patch(struct cs35l56_private *cs35l56) +static void cs35l56_patch(struct cs35l56_private *cs35l56, bool firmware_missing) { - unsigned int firmware_missing; int ret; - ret = regmap_read(cs35l56->base.regmap, CS35L56_PROTECTION_STATUS, &firmware_missing); - if (ret) { - dev_err(cs35l56->base.dev, "Failed to read PROTECTION_STATUS: %d\n", ret); - return; - } - - firmware_missing &= CS35L56_FIRMWARE_MISSING; - /* * Disable SoundWire interrupts to prevent race with IRQ work. * Setting sdw_irq_no_unmask prevents the handler re-enabling @@ -901,34 +892,49 @@ static void cs35l56_dsp_work(struct work_struct *work) struct cs35l56_private *cs35l56 = container_of(work, struct cs35l56_private, dsp_work); + unsigned int firmware_version; + bool firmware_missing; + int ret; if (!cs35l56->base.init_done) return; pm_runtime_get_sync(cs35l56->base.dev); + ret = cs35l56_read_prot_status(&cs35l56->base, &firmware_missing, &firmware_version); + if (ret) + goto err; + /* Populate fw file qualifier with the revision and security state */ - if (!cs35l56->dsp.fwf_name) { - cs35l56->dsp.fwf_name = kasprintf(GFP_KERNEL, "%02x%s-dsp1", + kfree(cs35l56->dsp.fwf_name); + if (firmware_missing) { + cs35l56->dsp.fwf_name = kasprintf(GFP_KERNEL, "%02x-dsp1", cs35l56->base.rev); + } else { + /* Firmware files must match the running firmware version */ + cs35l56->dsp.fwf_name = kasprintf(GFP_KERNEL, + "%02x%s-%06x-dsp1", cs35l56->base.rev, - cs35l56->base.secured ? "-s" : ""); - if (!cs35l56->dsp.fwf_name) - goto err; + cs35l56->base.secured ? "-s" : "", + firmware_version); } + if (!cs35l56->dsp.fwf_name) + goto err; + dev_dbg(cs35l56->base.dev, "DSP fwf name: '%s' system name: '%s'\n", cs35l56->dsp.fwf_name, cs35l56->dsp.system_name); /* - * When the device is running in secure mode the firmware files can - * only contain insecure tunings and therefore we do not need to - * shutdown the firmware to apply them and can use the lower cost - * reinit sequence instead. + * The firmware cannot be patched if it is already running from + * patch RAM. In this case the firmware files are versioned to + * match the running firmware version and will only contain + * tunings. We do not need to shutdown the firmware to apply + * tunings so can use the lower cost reinit sequence instead. */ - if (cs35l56->base.secured) - cs35l56_secure_patch(cs35l56); + if (!firmware_missing) + cs35l56_reinit_patch(cs35l56); else - cs35l56_patch(cs35l56); + cs35l56_patch(cs35l56, firmware_missing); /* -- GitLab From 245eeff18d7a37693815250ae15979ce98c3d190 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:32 +0000 Subject: [PATCH 0507/1405] ASoC: cs35l56: Load tunings for the correct speaker models If the "spk-id-gpios" property is present it points to GPIOs whose value must be used to select the correct bin file to match the speakers. Some manufacturers use multiple sources of speakers, which need different tunings for best performance. On these models the type of speaker fitted is indicated by the values of one or more GPIOs. The number formed by the GPIOs identifies the tuning required. The speaker ID must be used in combination with the subsystem ID (either from PCI SSID or cirrus,firmware-uid property), because the GPIOs can only indicate variants of a specific model. Signed-off-by: Richard Fitzgerald Fixes: 1a1c3d794ef6 ("ASoC: cs35l56: Use PCI SSID as the firmware UID") Link: https://msgid.link/r/20240129162737.497-14-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 1 + sound/soc/codecs/cs35l56-shared.c | 36 +++++++++++++++++++++++++++++++ sound/soc/codecs/cs35l56.c | 32 ++++++++++++++++++++++----- sound/soc/codecs/cs35l56.h | 1 + 4 files changed, 65 insertions(+), 5 deletions(-) diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index 5d6aefc41e64a..23da6298ab377 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -289,6 +289,7 @@ void cs35l56_init_cs_dsp(struct cs35l56_base *cs35l56_base, struct cs_dsp *cs_ds int cs35l56_read_prot_status(struct cs35l56_base *cs35l56_base, bool *fw_missing, unsigned int *fw_version); int cs35l56_hw_init(struct cs35l56_base *cs35l56_base); +int cs35l56_get_speaker_id(struct cs35l56_base *cs35l56_base); int cs35l56_get_bclk_freq_id(unsigned int freq); void cs35l56_fill_supply_names(struct regulator_bulk_data *data); diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 33835535ef843..02fba4bc0a14f 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -5,6 +5,7 @@ // Copyright (C) 2023 Cirrus Logic, Inc. and // Cirrus Logic International Semiconductor Ltd. +#include #include #include #include @@ -736,6 +737,41 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base) } EXPORT_SYMBOL_NS_GPL(cs35l56_hw_init, SND_SOC_CS35L56_SHARED); +int cs35l56_get_speaker_id(struct cs35l56_base *cs35l56_base) +{ + struct gpio_descs *descs; + int speaker_id; + int i, ret; + + /* Read the speaker type qualifier from the motherboard GPIOs */ + descs = gpiod_get_array_optional(cs35l56_base->dev, "spk-id", GPIOD_IN); + if (!descs) { + return -ENOENT; + } else if (IS_ERR(descs)) { + ret = PTR_ERR(descs); + return dev_err_probe(cs35l56_base->dev, ret, "Failed to get spk-id-gpios\n"); + } + + speaker_id = 0; + for (i = 0; i < descs->ndescs; i++) { + ret = gpiod_get_value_cansleep(descs->desc[i]); + if (ret < 0) { + dev_err_probe(cs35l56_base->dev, ret, "Failed to read spk-id[%d]\n", i); + goto err; + } + + speaker_id |= (ret << i); + } + + dev_dbg(cs35l56_base->dev, "Speaker ID = %d\n", speaker_id); + ret = speaker_id; +err: + gpiod_put_array(descs); + + return ret; +} +EXPORT_SYMBOL_NS_GPL(cs35l56_get_speaker_id, SND_SOC_CS35L56_SHARED); + static const u32 cs35l56_bclk_valid_for_pll_freq_table[] = { [0x0C] = 128000, [0x0F] = 256000, diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 597677422547c..c23e29da4cfb9 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -959,10 +959,19 @@ static int cs35l56_component_probe(struct snd_soc_component *component) if (!cs35l56->dsp.system_name && (snd_soc_card_get_pci_ssid(component->card, &vendor, &device) == 0)) { - cs35l56->dsp.system_name = devm_kasprintf(cs35l56->base.dev, - GFP_KERNEL, - "%04x%04x", - vendor, device); + /* Append a speaker qualifier if there is a speaker ID */ + if (cs35l56->speaker_id >= 0) { + cs35l56->dsp.system_name = devm_kasprintf(cs35l56->base.dev, + GFP_KERNEL, + "%04x%04x-spkid%d", + vendor, device, + cs35l56->speaker_id); + } else { + cs35l56->dsp.system_name = devm_kasprintf(cs35l56->base.dev, + GFP_KERNEL, + "%04x%04x", + vendor, device); + } if (!cs35l56->dsp.system_name) return -ENOMEM; } @@ -1245,7 +1254,13 @@ static int cs35l56_get_firmware_uid(struct cs35l56_private *cs35l56) if (ret < 0) return 0; - cs35l56->dsp.system_name = devm_kstrdup(dev, prop, GFP_KERNEL); + /* Append a speaker qualifier if there is a speaker ID */ + if (cs35l56->speaker_id >= 0) + cs35l56->dsp.system_name = devm_kasprintf(dev, GFP_KERNEL, "%s-spkid%d", + prop, cs35l56->speaker_id); + else + cs35l56->dsp.system_name = devm_kstrdup(dev, prop, GFP_KERNEL); + if (cs35l56->dsp.system_name == NULL) return -ENOMEM; @@ -1260,6 +1275,7 @@ int cs35l56_common_probe(struct cs35l56_private *cs35l56) init_completion(&cs35l56->init_completion); mutex_init(&cs35l56->base.irq_lock); + cs35l56->speaker_id = -ENOENT; dev_set_drvdata(cs35l56->base.dev, cs35l56); @@ -1296,6 +1312,12 @@ int cs35l56_common_probe(struct cs35l56_private *cs35l56) gpiod_set_value_cansleep(cs35l56->base.reset_gpio, 1); } + ret = cs35l56_get_speaker_id(&cs35l56->base); + if ((ret < 0) && (ret != -ENOENT)) + goto err; + + cs35l56->speaker_id = ret; + ret = cs35l56_get_firmware_uid(cs35l56); if (ret != 0) goto err; diff --git a/sound/soc/codecs/cs35l56.h b/sound/soc/codecs/cs35l56.h index dc2fe4c91e67b..596b141e3f961 100644 --- a/sound/soc/codecs/cs35l56.h +++ b/sound/soc/codecs/cs35l56.h @@ -45,6 +45,7 @@ struct cs35l56_private { bool sdw_attached; struct completion init_completion; + int speaker_id; u32 rx_mask; u32 tx_mask; u8 asp_slot_width; -- GitLab From 9e92b77ceb6f362eb2e7995dad6c7f9863053d97 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:33 +0000 Subject: [PATCH 0508/1405] ASoC: cs35l56: Allow more time for firmware to boot The original 50ms timeout for firmware boot is not long enough for worst-case time to reboot after a firmware download. Increase the timeout to 250ms. Signed-off-by: Richard Fitzgerald Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Link: https://msgid.link/r/20240129162737.497-15-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index 23da6298ab377..b24716ab27504 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -242,7 +242,7 @@ #define CS35L56_CONTROL_PORT_READY_US 2200 #define CS35L56_HALO_STATE_POLL_US 1000 -#define CS35L56_HALO_STATE_TIMEOUT_US 50000 +#define CS35L56_HALO_STATE_TIMEOUT_US 250000 #define CS35L56_RESET_PULSE_MIN_US 1100 #define CS35L56_WAKE_HOLD_TIME_US 1000 -- GitLab From 77c60722ded7d6739805e045e9648cda82dde5ed Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:34 +0000 Subject: [PATCH 0509/1405] ALSA: hda: cs35l56: Fix order of searching for firmware files Check for the cases of system-specific bin file without a wmfw before falling back to looking for a generic wmfw. All system-specific options should be tried before falling back to loading a generic wmfw/bin. With the original code, the presence of a fallback generic wmfw on the filesystem would prevent using a system-specific tuning with a ROM firmware. Signed-off-by: Richard Fitzgerald Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Link: https://msgid.link/r/20240129162737.497-16-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/pci/hda/cs35l56_hda.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index f22bcb104a4eb..7ba7234d8d9c0 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -483,6 +483,20 @@ static void cs35l56_hda_request_firmware_files(struct cs35l56_hda *cs35l56, NULL, "bin"); return; } + + /* + * Check for system-specific bin files without wmfw before + * falling back to generic firmware + */ + if (amp_name) + cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, + cirrus_dir, system_name, amp_name, "bin"); + if (!*coeff_firmware) + cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, + cirrus_dir, system_name, NULL, "bin"); + + if (*coeff_firmware) + return; } ret = cs35l56_hda_request_firmware_file(cs35l56, wmfw_firmware, wmfw_filename, @@ -493,16 +507,6 @@ static void cs35l56_hda_request_firmware_files(struct cs35l56_hda *cs35l56, return; } - /* When a firmware file is not found must still search for the coeff files */ - if (system_name) { - if (amp_name) - cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, - cirrus_dir, system_name, amp_name, "bin"); - if (!*coeff_firmware) - cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, - cirrus_dir, system_name, NULL, "bin"); - } - if (!*coeff_firmware) cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, cirrus_dir, NULL, NULL, "bin"); -- GitLab From e82bc517c6ef5d5c04b845420406e694c31bdb8a Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:35 +0000 Subject: [PATCH 0510/1405] ALSA: hda: cs35l56: Fix filename string field layout Change the filename field layout to: cs35l56-rev[-s]-dsp1-misc[-sub].[wmfw|bin] This is to keep the same firmware file naming scheme as the CS35L56 ASoC driver. This is not a compatibility break because no firmware files have been published. The original field layout matched the ASoC driver, but the way the ASoC driver used the wm_adsp driver config to form this filename was bugged. Fixing the ASoC driver to use the correct wm_adsp config strings means that the 's' flag (to indicate a secured part) has to move to somewhere after the first '-'. Signed-off-by: Richard Fitzgerald Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Link: https://msgid.link/r/20240129162737.497-17-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/pci/hda/cs35l56_hda.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 7ba7234d8d9c0..081479f65fe7c 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -405,16 +405,19 @@ static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56, int ret = 0; if (system_name && amp_name) - *filename = kasprintf(GFP_KERNEL, "%scs35l56%s-%02x-dsp1-misc-%s-%s.%s", dir, - cs35l56->base.secured ? "s" : "", cs35l56->base.rev, + *filename = kasprintf(GFP_KERNEL, "%scs35l56-%02x%s-dsp1-misc-%s-%s.%s", dir, + cs35l56->base.rev, + cs35l56->base.secured ? "-s" : "", system_name, amp_name, filetype); else if (system_name) - *filename = kasprintf(GFP_KERNEL, "%scs35l56%s-%02x-dsp1-misc-%s.%s", dir, - cs35l56->base.secured ? "s" : "", cs35l56->base.rev, + *filename = kasprintf(GFP_KERNEL, "%scs35l56-%02x%s-dsp1-misc-%s.%s", dir, + cs35l56->base.rev, + cs35l56->base.secured ? "-s" : "", system_name, filetype); else - *filename = kasprintf(GFP_KERNEL, "%scs35l56%s-%02x-dsp1-misc.%s", dir, - cs35l56->base.secured ? "s" : "", cs35l56->base.rev, + *filename = kasprintf(GFP_KERNEL, "%scs35l56-%02x%s-dsp1-misc.%s", dir, + cs35l56->base.rev, + cs35l56->base.secured ? "-s" : "", filetype); if (!*filename) -- GitLab From 6f8ad0480d82245961dae4d3280908611633872d Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:36 +0000 Subject: [PATCH 0511/1405] ALSA: hda: cs35l56: Firmware file must match the version of preloaded firmware Check whether the firmware is already patched. If so, include the firmware version in the firmware file name. If the firmware has already been patched by the BIOS the driver can only replace it if it has control of hard RESET. If the driver cannot replace the firmware, it can still load a wmfw (for ALSA control definitions) and/or a bin (for additional tunings). But these must match the version of firmware that is running on the CS35L56. The firmware is pre-patched if either: - FIRMWARE_MISSING == 0, or - it is a secured CS35L56 (which implies that is was already patched), cs35l56_hw_init() will set preloaded_fw_ver to the (non-zero) firmware version if either of these conditions is true. Normal (unpatched or replaceable firmware): cs35l56-rev-dsp1-misc[-system_name].[wmfw|bin] Preloaded firmware: cs35l56-rev[-s]-VVVVVV-dsp1-misc[-system_name].[wmfw|bin] Where: [-s] is an optional -s added into the name for a secured CS35L56 VVVVVV is the 24-bit firmware version in hexadecimal. Backport note: This won't apply to kernel versions older than v6.6. Signed-off-by: Richard Fitzgerald Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Link: https://msgid.link/r/20240129162737.497-18-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/pci/hda/cs35l56_hda.c | 93 +++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 44 deletions(-) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 081479f65fe7c..32736d3e45bad 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -397,7 +397,7 @@ static const struct cs_dsp_client_ops cs35l56_hda_client_ops = { static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56, const struct firmware **firmware, char **filename, - const char *dir, const char *system_name, + const char *base_name, const char *system_name, const char *amp_name, const char *filetype) { @@ -405,20 +405,13 @@ static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56, int ret = 0; if (system_name && amp_name) - *filename = kasprintf(GFP_KERNEL, "%scs35l56-%02x%s-dsp1-misc-%s-%s.%s", dir, - cs35l56->base.rev, - cs35l56->base.secured ? "-s" : "", + *filename = kasprintf(GFP_KERNEL, "%s-%s-%s.%s", base_name, system_name, amp_name, filetype); else if (system_name) - *filename = kasprintf(GFP_KERNEL, "%scs35l56-%02x%s-dsp1-misc-%s.%s", dir, - cs35l56->base.rev, - cs35l56->base.secured ? "-s" : "", + *filename = kasprintf(GFP_KERNEL, "%s-%s.%s", base_name, system_name, filetype); else - *filename = kasprintf(GFP_KERNEL, "%scs35l56-%02x%s-dsp1-misc.%s", dir, - cs35l56->base.rev, - cs35l56->base.secured ? "-s" : "", - filetype); + *filename = kasprintf(GFP_KERNEL, "%s.%s", base_name, filetype); if (!*filename) return -ENOMEM; @@ -451,8 +444,8 @@ static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56, return 0; } -static const char cirrus_dir[] = "cirrus/"; static void cs35l56_hda_request_firmware_files(struct cs35l56_hda *cs35l56, + unsigned int preloaded_fw_ver, const struct firmware **wmfw_firmware, char **wmfw_filename, const struct firmware **coeff_firmware, @@ -460,29 +453,43 @@ static void cs35l56_hda_request_firmware_files(struct cs35l56_hda *cs35l56, { const char *system_name = cs35l56->system_name; const char *amp_name = cs35l56->amp_name; + char base_name[37]; int ret; + if (preloaded_fw_ver) { + snprintf(base_name, sizeof(base_name), + "cirrus/cs35l56-%02x%s-%06x-dsp1-misc", + cs35l56->base.rev, + cs35l56->base.secured ? "-s" : "", + preloaded_fw_ver & 0xffffff); + } else { + snprintf(base_name, sizeof(base_name), + "cirrus/cs35l56-%02x%s-dsp1-misc", + cs35l56->base.rev, + cs35l56->base.secured ? "-s" : ""); + } + if (system_name && amp_name) { if (!cs35l56_hda_request_firmware_file(cs35l56, wmfw_firmware, wmfw_filename, - cirrus_dir, system_name, amp_name, "wmfw")) { + base_name, system_name, amp_name, "wmfw")) { cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, - cirrus_dir, system_name, amp_name, "bin"); + base_name, system_name, amp_name, "bin"); return; } } if (system_name) { if (!cs35l56_hda_request_firmware_file(cs35l56, wmfw_firmware, wmfw_filename, - cirrus_dir, system_name, NULL, "wmfw")) { + base_name, system_name, NULL, "wmfw")) { if (amp_name) cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, - cirrus_dir, system_name, + base_name, system_name, amp_name, "bin"); if (!*coeff_firmware) cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, - cirrus_dir, system_name, + base_name, system_name, NULL, "bin"); return; } @@ -493,26 +500,26 @@ static void cs35l56_hda_request_firmware_files(struct cs35l56_hda *cs35l56, */ if (amp_name) cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, - cirrus_dir, system_name, amp_name, "bin"); + base_name, system_name, amp_name, "bin"); if (!*coeff_firmware) cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, - cirrus_dir, system_name, NULL, "bin"); + base_name, system_name, NULL, "bin"); if (*coeff_firmware) return; } ret = cs35l56_hda_request_firmware_file(cs35l56, wmfw_firmware, wmfw_filename, - cirrus_dir, NULL, NULL, "wmfw"); + base_name, NULL, NULL, "wmfw"); if (!ret) { cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, - cirrus_dir, NULL, NULL, "bin"); + base_name, NULL, NULL, "bin"); return; } if (!*coeff_firmware) cs35l56_hda_request_firmware_file(cs35l56, coeff_firmware, coeff_filename, - cirrus_dir, NULL, NULL, "bin"); + base_name, NULL, NULL, "bin"); } static void cs35l56_hda_release_firmware_files(const struct firmware *wmfw_firmware, @@ -546,7 +553,8 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56) const struct firmware *wmfw_firmware = NULL; char *coeff_filename = NULL; char *wmfw_filename = NULL; - unsigned int firmware_missing; + unsigned int preloaded_fw_ver; + bool firmware_missing; int ret = 0; /* Prepare for a new DSP power-up */ @@ -557,24 +565,21 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56) pm_runtime_get_sync(cs35l56->base.dev); - ret = regmap_read(cs35l56->base.regmap, CS35L56_PROTECTION_STATUS, &firmware_missing); - if (ret) { - dev_err(cs35l56->base.dev, "Failed to read PROTECTION_STATUS: %d\n", ret); + /* + * The firmware can only be upgraded if it is currently running + * from the built-in ROM. If not, the wmfw/bin must be for the + * version of firmware that is running on the chip. + */ + ret = cs35l56_read_prot_status(&cs35l56->base, &firmware_missing, &preloaded_fw_ver); + if (ret) goto err_pm_put; - } - firmware_missing &= CS35L56_FIRMWARE_MISSING; + if (firmware_missing) + preloaded_fw_ver = 0; - /* - * Firmware can only be downloaded if the CS35L56 is secured or is - * running from the built-in ROM. If it is secured the BIOS will have - * downloaded firmware, and the wmfw/bin files will only contain - * tunings that are safe to download with the firmware running. - */ - if (cs35l56->base.secured || firmware_missing) { - cs35l56_hda_request_firmware_files(cs35l56, &wmfw_firmware, &wmfw_filename, - &coeff_firmware, &coeff_filename); - } + cs35l56_hda_request_firmware_files(cs35l56, preloaded_fw_ver, + &wmfw_firmware, &wmfw_filename, + &coeff_firmware, &coeff_filename); /* * If the BIOS didn't patch the firmware a bin file is mandatory to @@ -589,12 +594,12 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56) mutex_lock(&cs35l56->base.irq_lock); /* - * When the device is running in secure mode the firmware files can - * only contain insecure tunings and therefore we do not need to - * shutdown the firmware to apply them and can use the lower cost - * reinit sequence instead. + * If the firmware hasn't been patched it must be shutdown before + * doing a full patch and reset afterwards. If it is already + * running a patched version the firmware files only contain + * tunings and we can use the lower cost reinit sequence instead. */ - if (!cs35l56->base.secured && (wmfw_firmware || coeff_firmware)) { + if (firmware_missing && (wmfw_firmware || coeff_firmware)) { ret = cs35l56_firmware_shutdown(&cs35l56->base); if (ret) goto err; @@ -613,7 +618,7 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56) if (coeff_filename) dev_dbg(cs35l56->base.dev, "Loaded Coefficients: %s\n", coeff_filename); - if (cs35l56->base.secured) { + if (!firmware_missing) { ret = cs35l56_mbox_send(&cs35l56->base, CS35L56_MBOX_CMD_AUDIO_REINIT); if (ret) goto err_powered_up; -- GitLab From 28876c1ae8b8cd1dacef50bd6c0555824774f0d2 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 29 Jan 2024 16:27:37 +0000 Subject: [PATCH 0512/1405] ALSA: hda: cs35l56: Remove unused test stub function Remove an unused stub function that calls a non-existant function. This function was accidentally added as part of commit 2144833e7b41 ("ALSA: hda: cirrus_scodec: Add KUnit test"). It was a relic of an earlier version of the test that should have been removed. Signed-off-by: Richard Fitzgerald Fixes: 2144833e7b41 ("ALSA: hda: cirrus_scodec: Add KUnit test") Link: https://msgid.link/r/20240129162737.497-19-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/pci/hda/cs35l56_hda.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 32736d3e45bad..75a14ba54fcd1 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -1063,16 +1063,6 @@ const struct dev_pm_ops cs35l56_hda_pm_ops = { }; EXPORT_SYMBOL_NS_GPL(cs35l56_hda_pm_ops, SND_HDA_SCODEC_CS35L56); -#if IS_ENABLED(CONFIG_SND_HDA_SCODEC_CS35L56_KUNIT_TEST) -/* Hooks to export static function to KUnit test */ - -int cs35l56_hda_test_hook_get_speaker_id(struct device *dev, int amp_index, int num_amps) -{ - return cs35l56_hda_get_speaker_id(dev, amp_index, num_amps); -} -EXPORT_SYMBOL_NS_GPL(cs35l56_hda_test_hook_get_speaker_id, SND_HDA_SCODEC_CS35L56); -#endif - MODULE_DESCRIPTION("CS35L56 HDA Driver"); MODULE_IMPORT_NS(SND_HDA_CIRRUS_SCODEC); MODULE_IMPORT_NS(SND_HDA_CS_DSP_CONTROLS); -- GitLab From c7de2d9bb68a5fc71c25ff96705a80a76c8436eb Mon Sep 17 00:00:00 2001 From: Edson Juliano Drosdeck Date: Thu, 1 Feb 2024 09:21:14 -0300 Subject: [PATCH 0513/1405] ALSA: hda/realtek: Enable headset mic on Vaio VJFE-ADL Vaio VJFE-ADL is equipped with ALC269VC, and it needs ALC298_FIXUP_SPK_VOLUME quirk to make its headset mic work. Signed-off-by: Edson Juliano Drosdeck Cc: Link: https://lore.kernel.org/r/20240201122114.30080-1-edson.drosdeck@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f98520c0bed9f..6994c4c5073cb 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10331,6 +10331,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO), + SND_PCI_QUIRK(0x2782, 0x1707, "Vaio VJFE-ADL", ALC298_FIXUP_SPK_VOLUME), SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), -- GitLab From 72bd80252feeb3bef8724230ee15d9f7ab541c6e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 1 Feb 2024 06:42:36 -0700 Subject: [PATCH 0514/1405] io_uring/net: fix sr->len for IORING_OP_RECV with MSG_WAITALL and buffers If we use IORING_OP_RECV with provided buffers and pass in '0' as the length of the request, the length is retrieved from the selected buffer. If MSG_WAITALL is also set and we get a short receive, then we may hit the retry path which decrements sr->len and increments the buffer for a retry. However, the length is still zero at this point, which means that sr->len now becomes huge and import_ubuf() will cap it to MAX_RW_COUNT and subsequently return -EFAULT for the range as a whole. Fix this by always assigning sr->len once the buffer has been selected. Cc: stable@vger.kernel.org Fixes: 7ba89d2af17a ("io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly") Signed-off-by: Jens Axboe --- io_uring/net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/net.c b/io_uring/net.c index a12ff69e68434..43bc9a5f96f9d 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -923,6 +923,7 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags) if (!buf) return -ENOBUFS; sr->buf = buf; + sr->len = len; } ret = import_ubuf(ITER_DEST, sr->buf, len, &msg.msg_iter); -- GitLab From 6813cdca4ab94a238f8eb0cef3d3f3fcbdfb0ee0 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Wed, 31 Jan 2024 10:19:20 +0800 Subject: [PATCH 0515/1405] drm/amdgpu/pm: Use inline function for IP version check Use existing inline function for IP version check. Signed-off-by: Ma Jun Reviewed-by: Yang Wang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 3230701d0d381..a9954ffc02c56 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2944,7 +2944,7 @@ static bool smu_v13_0_0_wbrf_support_check(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - switch (adev->ip_versions[MP1_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(13, 0, 0): return smu->smc_fw_version >= 0x004e6300; case IP_VERSION(13, 0, 10): -- GitLab From 04f647c8e456fcfabe9c252a4dcaee03b586fa4f Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Tue, 30 Jan 2024 17:36:10 +0530 Subject: [PATCH 0516/1405] octeontx2-pf: Remove xdp queues on program detach XDP queues are created/destroyed when a XDP program is attached/detached. In current driver xdp_queues are not getting destroyed on program exit due to incorrect xdp_queue and tot_tx_queue count values. This patch fixes the issue by setting tot_tx_queue and xdp_queue count to correct values. It also fixes xdp.data_hard_start address. Fixes: 06059a1a9a4a ("octeontx2-pf: Add XDP support to netdev PF") Signed-off-by: Geetha sowjanya Link: https://lore.kernel.org/r/20240130120610.16673-1-gakula@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c | 1 - drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 3 +-- drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 7 +++---- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 2928898c7f8df..7f786de610148 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -314,7 +314,6 @@ static int otx2_set_channels(struct net_device *dev, pfvf->hw.tx_queues = channel->tx_count; if (pfvf->xdp_prog) pfvf->hw.xdp_queues = channel->rx_count; - pfvf->hw.non_qos_queues = pfvf->hw.tx_queues + pfvf->hw.xdp_queues; if (if_up) err = dev->netdev_ops->ndo_open(dev); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index a57455aebff6f..e5fe67e738655 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1744,6 +1744,7 @@ int otx2_open(struct net_device *netdev) /* RQ and SQs are mapped to different CQs, * so find out max CQ IRQs (i.e CINTs) needed. */ + pf->hw.non_qos_queues = pf->hw.tx_queues + pf->hw.xdp_queues; pf->hw.cint_cnt = max3(pf->hw.rx_queues, pf->hw.tx_queues, pf->hw.tc_tx_queues); @@ -2643,8 +2644,6 @@ static int otx2_xdp_setup(struct otx2_nic *pf, struct bpf_prog *prog) xdp_features_clear_redirect_target(dev); } - pf->hw.non_qos_queues += pf->hw.xdp_queues; - if (if_up) otx2_open(pf->netdev); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 4d519ea833b2c..f828d32737af0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -1403,7 +1403,7 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, bool *need_xdp_flush) { - unsigned char *hard_start, *data; + unsigned char *hard_start; int qidx = cq->cq_idx; struct xdp_buff xdp; struct page *page; @@ -1417,9 +1417,8 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, xdp_init_buff(&xdp, pfvf->rbsize, &cq->xdp_rxq); - data = (unsigned char *)phys_to_virt(pa); - hard_start = page_address(page); - xdp_prepare_buff(&xdp, hard_start, data - hard_start, + hard_start = (unsigned char *)phys_to_virt(pa); + xdp_prepare_buff(&xdp, hard_start, OTX2_HEAD_ROOM, cqe->sg.seg_size, false); act = bpf_prog_run_xdp(prog, &xdp); -- GitLab From eaa1b01fe709d6a236a9cec74813e0400601fd23 Mon Sep 17 00:00:00 2001 From: Alexander Tsoy Date: Thu, 1 Feb 2024 14:53:08 +0300 Subject: [PATCH 0517/1405] ALSA: usb-audio: Ignore clock selector errors for single connection For devices with multiple clock sources connected to a selector, we need to check what a clock selector control request has returned. This is needed to ensure that a requested clock source is indeed selected and for autoclock feature to work. For devices with single clock source connected, if we get an error there is nothing else we can do about it. We can't skip clock selector setup as it is required by some devices. So lets just ignore error in this case. This should fix various buggy Mackie devices: [ 649.109785] usb 1-1.3: parse_audio_format_rates_v2v3(): unable to find clock source (clock -32) [ 649.111946] usb 1-1.3: parse_audio_format_rates_v2v3(): unable to find clock source (clock -32) [ 649.113822] usb 1-1.3: parse_audio_format_rates_v2v3(): unable to find clock source (clock -32) There is also interesting info from the Windows documentation [1] (this is probably why manufacturers dont't even test this feature): "The USB Audio 2.0 driver doesn't support clock selection. The driver uses the Clock Source Entity, which is selected by default and never issues a Clock Selector Control SET CUR request." Link: https://learn.microsoft.com/en-us/windows-hardware/drivers/audio/usb-2-0-audio-drivers [1] Link: https://bugzilla.kernel.org/show_bug.cgi?id=217314 Link: https://bugzilla.kernel.org/show_bug.cgi?id=218175 Link: https://bugzilla.kernel.org/show_bug.cgi?id=218342 Signed-off-by: Alexander Tsoy Link: https://lore.kernel.org/r/20240201115308.17838-1-alexander@tsoy.me Signed-off-by: Takashi Iwai --- sound/usb/clock.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/usb/clock.c b/sound/usb/clock.c index a8204c6d6fac1..60fcb872a80b6 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -347,8 +347,16 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, !writeable) return ret; err = uac_clock_selector_set_val(chip, entity_id, cur); - if (err < 0) + if (err < 0) { + if (pins == 1) { + usb_audio_dbg(chip, + "%s(): selector returned an error, " + "assuming a firmware bug, id %d, ret %d\n", + __func__, clock_id, err); + return ret; + } return err; + } } if (!validate || ret > 0 || !chip->autoclock) -- GitLab From 49304c2b93e4f7468b51ef717cbe637981397115 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 31 Jan 2024 23:32:27 -0500 Subject: [PATCH 0518/1405] tracefs: dentry lookup crapectomy The dentry lookup for eventfs files was very broken, and had lots of signs of the old situation where the filesystem names were all created statically in the dentry tree, rather than being looked up dynamically based on the eventfs data structures. You could see it in the naming - how it claimed to "create" dentries rather than just look up the dentries that were given it. You could see it in various nonsensical and very incorrect operations, like using "simple_lookup()" on the dentries that were passed in, which only results in those dentries becoming negative dentries. Which meant that any other lookup would possibly return ENOENT if it saw that negative dentry before the data was then later filled in. You could see it in the immense amount of nonsensical code that didn't actually just do lookups. Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/ Link: https://lore.kernel.org/linux-trace-kernel/20240131233227.73db55e1@gandalf.local.home Cc: stable@vger.kernel.org Cc: Al Viro Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Greg Kroah-Hartman Cc: Ajay Kaher Cc: Mark Rutland Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions") Signed-off-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 275 +++++++-------------------------------- fs/tracefs/inode.c | 69 ---------- fs/tracefs/internal.h | 3 - 3 files changed, 50 insertions(+), 297 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index e9819d719d2ac..04c2ab90f93e6 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -230,7 +230,6 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) { struct eventfs_inode *ei; - mutex_lock(&eventfs_mutex); do { // The parent is stable because we do not do renames dentry = dentry->d_parent; @@ -247,7 +246,6 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) } // Walk upwards until you find the events inode } while (!ei->is_events); - mutex_unlock(&eventfs_mutex); update_top_events_attr(ei, dentry->d_sb); @@ -280,11 +278,10 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode, } /** - * create_file - create a file in the tracefs filesystem - * @name: the name of the file to create. + * lookup_file - look up a file in the tracefs filesystem + * @dentry: the dentry to look up * @mode: the permission that the file should have. * @attr: saved attributes changed by user - * @parent: parent dentry for this file. * @data: something that the caller will want to get to later on. * @fop: struct file_operations that should be used for this file. * @@ -292,13 +289,13 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode, * directory. The inode.i_private pointer will point to @data in the open() * call. */ -static struct dentry *create_file(const char *name, umode_t mode, +static struct dentry *lookup_file(struct dentry *dentry, + umode_t mode, struct eventfs_attr *attr, - struct dentry *parent, void *data, + void *data, const struct file_operations *fop) { struct tracefs_inode *ti; - struct dentry *dentry; struct inode *inode; if (!(mode & S_IFMT)) @@ -307,15 +304,9 @@ static struct dentry *create_file(const char *name, umode_t mode, if (WARN_ON_ONCE(!S_ISREG(mode))) return NULL; - WARN_ON_ONCE(!parent); - dentry = eventfs_start_creating(name, parent); - - if (IS_ERR(dentry)) - return dentry; - inode = tracefs_get_inode(dentry->d_sb); if (unlikely(!inode)) - return eventfs_failed_creating(dentry); + return ERR_PTR(-ENOMEM); /* If the user updated the directory's attributes, use them */ update_inode_attr(dentry, inode, attr, mode); @@ -329,32 +320,29 @@ static struct dentry *create_file(const char *name, umode_t mode, ti = get_tracefs(inode); ti->flags |= TRACEFS_EVENT_INODE; - d_instantiate(dentry, inode); + + d_add(dentry, inode); fsnotify_create(dentry->d_parent->d_inode, dentry); - return eventfs_end_creating(dentry); + return dentry; }; /** - * create_dir - create a dir in the tracefs filesystem + * lookup_dir_entry - look up a dir in the tracefs filesystem + * @dentry: the directory to look up * @ei: the eventfs_inode that represents the directory to create - * @parent: parent dentry for this file. * - * This function will create a dentry for a directory represented by + * This function will look up a dentry for a directory represented by * a eventfs_inode. */ -static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent) +static struct dentry *lookup_dir_entry(struct dentry *dentry, + struct eventfs_inode *pei, struct eventfs_inode *ei) { struct tracefs_inode *ti; - struct dentry *dentry; struct inode *inode; - dentry = eventfs_start_creating(ei->name, parent); - if (IS_ERR(dentry)) - return dentry; - inode = tracefs_get_inode(dentry->d_sb); if (unlikely(!inode)) - return eventfs_failed_creating(dentry); + return ERR_PTR(-ENOMEM); /* If the user updated the directory's attributes, use them */ update_inode_attr(dentry, inode, &ei->attr, @@ -371,11 +359,14 @@ static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent /* Only directories have ti->private set to an ei, not files */ ti->private = ei; + dentry->d_fsdata = ei; + ei->dentry = dentry; // Remove me! + inc_nlink(inode); - d_instantiate(dentry, inode); + d_add(dentry, inode); inc_nlink(dentry->d_parent->d_inode); fsnotify_mkdir(dentry->d_parent->d_inode, dentry); - return eventfs_end_creating(dentry); + return dentry; } static void free_ei(struct eventfs_inode *ei) @@ -425,7 +416,7 @@ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) } /** - * create_file_dentry - create a dentry for a file of an eventfs_inode + * lookup_file_dentry - create a dentry for a file of an eventfs_inode * @ei: the eventfs_inode that the file will be created under * @idx: the index into the d_children[] of the @ei * @parent: The parent dentry of the created file. @@ -438,157 +429,21 @@ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) * address located at @e_dentry. */ static struct dentry * -create_file_dentry(struct eventfs_inode *ei, int idx, - struct dentry *parent, const char *name, umode_t mode, void *data, +lookup_file_dentry(struct dentry *dentry, + struct eventfs_inode *ei, int idx, + umode_t mode, void *data, const struct file_operations *fops) { struct eventfs_attr *attr = NULL; struct dentry **e_dentry = &ei->d_children[idx]; - struct dentry *dentry; - - WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); - mutex_lock(&eventfs_mutex); - if (ei->is_freed) { - mutex_unlock(&eventfs_mutex); - return NULL; - } - /* If the e_dentry already has a dentry, use it */ - if (*e_dentry) { - dget(*e_dentry); - mutex_unlock(&eventfs_mutex); - return *e_dentry; - } - - /* ei->entry_attrs are protected by SRCU */ if (ei->entry_attrs) attr = &ei->entry_attrs[idx]; - mutex_unlock(&eventfs_mutex); - - dentry = create_file(name, mode, attr, parent, data, fops); + dentry->d_fsdata = ei; // NOTE: ei of _parent_ + lookup_file(dentry, mode, attr, data, fops); - mutex_lock(&eventfs_mutex); - - if (IS_ERR_OR_NULL(dentry)) { - /* - * When the mutex was released, something else could have - * created the dentry for this e_dentry. In which case - * use that one. - * - * If ei->is_freed is set, the e_dentry is currently on its - * way to being freed, don't return it. If e_dentry is NULL - * it means it was already freed. - */ - if (ei->is_freed) { - dentry = NULL; - } else { - dentry = *e_dentry; - dget(dentry); - } - mutex_unlock(&eventfs_mutex); - return dentry; - } - - if (!*e_dentry && !ei->is_freed) { - *e_dentry = dentry; - dentry->d_fsdata = ei; - } else { - /* - * Should never happen unless we get here due to being freed. - * Otherwise it means two dentries exist with the same name. - */ - WARN_ON_ONCE(!ei->is_freed); - dentry = NULL; - } - mutex_unlock(&eventfs_mutex); - - return dentry; -} - -/** - * eventfs_post_create_dir - post create dir routine - * @ei: eventfs_inode of recently created dir - * - * Map the meta-data of files within an eventfs dir to their parent dentry - */ -static void eventfs_post_create_dir(struct eventfs_inode *ei) -{ - struct eventfs_inode *ei_child; - - lockdep_assert_held(&eventfs_mutex); - - /* srcu lock already held */ - /* fill parent-child relation */ - list_for_each_entry_srcu(ei_child, &ei->children, list, - srcu_read_lock_held(&eventfs_srcu)) { - ei_child->d_parent = ei->dentry; - } -} - -/** - * create_dir_dentry - Create a directory dentry for the eventfs_inode - * @pei: The eventfs_inode parent of ei. - * @ei: The eventfs_inode to create the directory for - * @parent: The dentry of the parent of this directory - * - * This creates and attaches a directory dentry to the eventfs_inode @ei. - */ -static struct dentry * -create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, - struct dentry *parent) -{ - struct dentry *dentry = NULL; - - WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); - - mutex_lock(&eventfs_mutex); - if (pei->is_freed || ei->is_freed) { - mutex_unlock(&eventfs_mutex); - return NULL; - } - if (ei->dentry) { - /* If the eventfs_inode already has a dentry, use it */ - dentry = ei->dentry; - dget(dentry); - mutex_unlock(&eventfs_mutex); - return dentry; - } - mutex_unlock(&eventfs_mutex); - - dentry = create_dir(ei, parent); - - mutex_lock(&eventfs_mutex); - - if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) { - /* - * When the mutex was released, something else could have - * created the dentry for this e_dentry. In which case - * use that one. - * - * If ei->is_freed is set, the e_dentry is currently on its - * way to being freed. - */ - dentry = ei->dentry; - if (dentry) - dget(dentry); - mutex_unlock(&eventfs_mutex); - return dentry; - } - - if (!ei->dentry && !ei->is_freed) { - ei->dentry = dentry; - eventfs_post_create_dir(ei); - dentry->d_fsdata = ei; - } else { - /* - * Should never happen unless we get here due to being freed. - * Otherwise it means two dentries exist with the same name. - */ - WARN_ON_ONCE(!ei->is_freed); - dentry = NULL; - } - mutex_unlock(&eventfs_mutex); + *e_dentry = dentry; // Remove me return dentry; } @@ -607,79 +462,49 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - const struct file_operations *fops; - const struct eventfs_entry *entry; struct eventfs_inode *ei_child; struct tracefs_inode *ti; struct eventfs_inode *ei; - struct dentry *ei_dentry = NULL; - struct dentry *ret = NULL; - struct dentry *d; const char *name = dentry->d_name.name; - umode_t mode; - void *data; - int idx; - int i; - int r; ti = get_tracefs(dir); if (!(ti->flags & TRACEFS_EVENT_INODE)) - return NULL; - - /* Grab srcu to prevent the ei from going away */ - idx = srcu_read_lock(&eventfs_srcu); + return ERR_PTR(-EIO); - /* - * Grab the eventfs_mutex to consistent value from ti->private. - * This s - */ mutex_lock(&eventfs_mutex); - ei = READ_ONCE(ti->private); - if (ei && !ei->is_freed) - ei_dentry = READ_ONCE(ei->dentry); - mutex_unlock(&eventfs_mutex); - if (!ei || !ei_dentry) + ei = ti->private; + if (!ei || ei->is_freed) goto out; - data = ei->data; - - list_for_each_entry_srcu(ei_child, &ei->children, list, - srcu_read_lock_held(&eventfs_srcu)) { + list_for_each_entry(ei_child, &ei->children, list) { if (strcmp(ei_child->name, name) != 0) continue; - ret = simple_lookup(dir, dentry, flags); - if (IS_ERR(ret)) + if (ei_child->is_freed) goto out; - d = create_dir_dentry(ei, ei_child, ei_dentry); - dput(d); + lookup_dir_entry(dentry, ei, ei_child); goto out; } - for (i = 0; i < ei->nr_entries; i++) { - entry = &ei->entries[i]; - if (strcmp(name, entry->name) == 0) { - void *cdata = data; - mutex_lock(&eventfs_mutex); - /* If ei->is_freed, then the event itself may be too */ - if (!ei->is_freed) - r = entry->callback(name, &mode, &cdata, &fops); - else - r = -1; - mutex_unlock(&eventfs_mutex); - if (r <= 0) - continue; - ret = simple_lookup(dir, dentry, flags); - if (IS_ERR(ret)) - goto out; - d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops); - dput(d); - break; - } + for (int i = 0; i < ei->nr_entries; i++) { + void *data; + umode_t mode; + const struct file_operations *fops; + const struct eventfs_entry *entry = &ei->entries[i]; + + if (strcmp(name, entry->name) != 0) + continue; + + data = ei->data; + if (entry->callback(name, &mode, &data, &fops) <= 0) + goto out; + + lookup_file_dentry(dentry, ei, i, mode, data, fops); + goto out; } out: - srcu_read_unlock(&eventfs_srcu, idx); - return ret; + mutex_unlock(&eventfs_mutex); + return NULL; } /* diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 888e420878477..5c84460feeebb 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -495,75 +495,6 @@ struct dentry *tracefs_end_creating(struct dentry *dentry) return dentry; } -/** - * eventfs_start_creating - start the process of creating a dentry - * @name: Name of the file created for the dentry - * @parent: The parent dentry where this dentry will be created - * - * This is a simple helper function for the dynamically created eventfs - * files. When the directory of the eventfs files are accessed, their - * dentries are created on the fly. This function is used to start that - * process. - */ -struct dentry *eventfs_start_creating(const char *name, struct dentry *parent) -{ - struct dentry *dentry; - int error; - - /* Must always have a parent. */ - if (WARN_ON_ONCE(!parent)) - return ERR_PTR(-EINVAL); - - error = simple_pin_fs(&trace_fs_type, &tracefs_mount, - &tracefs_mount_count); - if (error) - return ERR_PTR(error); - - if (unlikely(IS_DEADDIR(parent->d_inode))) - dentry = ERR_PTR(-ENOENT); - else - dentry = lookup_one_len(name, parent, strlen(name)); - - if (!IS_ERR(dentry) && dentry->d_inode) { - dput(dentry); - dentry = ERR_PTR(-EEXIST); - } - - if (IS_ERR(dentry)) - simple_release_fs(&tracefs_mount, &tracefs_mount_count); - - return dentry; -} - -/** - * eventfs_failed_creating - clean up a failed eventfs dentry creation - * @dentry: The dentry to clean up - * - * If after calling eventfs_start_creating(), a failure is detected, the - * resources created by eventfs_start_creating() needs to be cleaned up. In - * that case, this function should be called to perform that clean up. - */ -struct dentry *eventfs_failed_creating(struct dentry *dentry) -{ - dput(dentry); - simple_release_fs(&tracefs_mount, &tracefs_mount_count); - return NULL; -} - -/** - * eventfs_end_creating - Finish the process of creating a eventfs dentry - * @dentry: The dentry that has successfully been created. - * - * This function is currently just a place holder to match - * eventfs_start_creating(). In case any synchronization needs to be added, - * this function will be used to implement that without having to modify - * the callers of eventfs_start_creating(). - */ -struct dentry *eventfs_end_creating(struct dentry *dentry) -{ - return dentry; -} - /* Find the inode that this will use for default */ static struct inode *instance_inode(struct dentry *parent, struct inode *inode) { diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 7d84349ade872..09037e2c173dc 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -80,9 +80,6 @@ struct dentry *tracefs_start_creating(const char *name, struct dentry *parent); struct dentry *tracefs_end_creating(struct dentry *dentry); struct dentry *tracefs_failed_creating(struct dentry *dentry); struct inode *tracefs_get_inode(struct super_block *sb); -struct dentry *eventfs_start_creating(const char *name, struct dentry *parent); -struct dentry *eventfs_failed_creating(struct dentry *dentry); -struct dentry *eventfs_end_creating(struct dentry *dentry); void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry); #endif /* _TRACEFS_INTERNAL_H */ -- GitLab From 408600be78cdb8c650a97ecc7ff411cb216811b5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 31 Jan 2024 13:49:23 -0500 Subject: [PATCH 0519/1405] eventfs: Remove unused d_parent pointer field It's never used Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/ Link: https://lore.kernel.org/linux-trace-kernel/20240131185512.961772428@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Cc: Greg Kroah-Hartman Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions") Signed-off-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 4 +--- fs/tracefs/internal.h | 2 -- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 04c2ab90f93e6..16ca8d9759b18 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -680,10 +680,8 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode INIT_LIST_HEAD(&ei->list); mutex_lock(&eventfs_mutex); - if (!parent->is_freed) { + if (!parent->is_freed) list_add_tail(&ei->list, &parent->children); - ei->d_parent = parent->dentry; - } mutex_unlock(&eventfs_mutex); /* Was the parent freed? */ diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 09037e2c173dc..932733a2696a1 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -36,7 +36,6 @@ struct eventfs_attr { * @name: the name of the directory to create * @children: link list into the child eventfs_inode * @dentry: the dentry of the directory - * @d_parent: pointer to the parent's dentry * @d_children: The array of dentries to represent the files when created * @entry_attrs: Saved mode and ownership of the @d_children * @attr: Saved mode and ownership of eventfs_inode itself @@ -51,7 +50,6 @@ struct eventfs_inode { const char *name; struct list_head children; struct dentry *dentry; /* Check is_freed to access */ - struct dentry *d_parent; struct dentry **d_children; struct eventfs_attr *entry_attrs; struct eventfs_attr attr; -- GitLab From 8dce06e98c70a7fcbb4bca7d90faf40522e65c58 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 31 Jan 2024 13:49:24 -0500 Subject: [PATCH 0520/1405] eventfs: Clean up dentry ops and add revalidate function In order for the dentries to stay up-to-date with the eventfs changes, just add a 'd_revalidate' function that checks the 'is_freed' bit. Also, clean up the dentry release to actually use d_release() rather than the slightly odd d_iput() function. We don't care about the inode, all we want to do is to get rid of the refcount to the eventfs data added by dentry->d_fsdata. It would probably be cleaner to make eventfs its own filesystem, or at least set its own dentry ops when looking up eventfs files. But as it is, only eventfs dentries use d_fsdata, so we don't really need to split these things up by use. Another thing that might be worth doing is to make all eventfs lookups mark their dentries as not worth caching. We could do that with d_delete(), but the DCACHE_DONTCACHE flag would likely be even better. As it is, the dentries are all freeable, but they only tend to get freed at memory pressure rather than more proactively. But that's a separate issue. Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/ Link: https://lore.kernel.org/linux-trace-kernel/20240131185513.124644253@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Cc: Greg Kroah-Hartman Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions") Signed-off-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 5 ++--- fs/tracefs/inode.c | 27 ++++++++++++++++++--------- fs/tracefs/internal.h | 3 ++- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 16ca8d9759b18..b2285d5f3fedf 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -378,13 +378,12 @@ static void free_ei(struct eventfs_inode *ei) } /** - * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode - * @ti: the tracefs_inode of the dentry + * eventfs_d_release - dentry is going away * @dentry: dentry which has the reference to remove. * * Remove the association between a dentry from an eventfs_inode. */ -void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) +void eventfs_d_release(struct dentry *dentry) { struct eventfs_inode *ei; int i; diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 5c84460feeebb..d65ffad4c327c 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -377,21 +377,30 @@ static const struct super_operations tracefs_super_operations = { .show_options = tracefs_show_options, }; -static void tracefs_dentry_iput(struct dentry *dentry, struct inode *inode) +/* + * It would be cleaner if eventfs had its own dentry ops. + * + * Note that d_revalidate is called potentially under RCU, + * so it can't take the eventfs mutex etc. It's fine - if + * we open a file just as it's marked dead, things will + * still work just fine, and just see the old stale case. + */ +static void tracefs_d_release(struct dentry *dentry) { - struct tracefs_inode *ti; + if (dentry->d_fsdata) + eventfs_d_release(dentry); +} - if (!dentry || !inode) - return; +static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct eventfs_inode *ei = dentry->d_fsdata; - ti = get_tracefs(inode); - if (ti && ti->flags & TRACEFS_EVENT_INODE) - eventfs_set_ei_status_free(ti, dentry); - iput(inode); + return !(ei && ei->is_freed); } static const struct dentry_operations tracefs_dentry_operations = { - .d_iput = tracefs_dentry_iput, + .d_revalidate = tracefs_d_revalidate, + .d_release = tracefs_d_release, }; static int trace_fill_super(struct super_block *sb, void *data, int silent) diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 932733a2696a1..4b50a0668055b 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -78,6 +78,7 @@ struct dentry *tracefs_start_creating(const char *name, struct dentry *parent); struct dentry *tracefs_end_creating(struct dentry *dentry); struct dentry *tracefs_failed_creating(struct dentry *dentry); struct inode *tracefs_get_inode(struct super_block *sb); -void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry); + +void eventfs_d_release(struct dentry *dentry); #endif /* _TRACEFS_INTERNAL_H */ -- GitLab From 43aa6f97c2d03a52c1ddb86768575fc84344bdbb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 31 Jan 2024 13:49:25 -0500 Subject: [PATCH 0521/1405] eventfs: Get rid of dentry pointers without refcounts The eventfs inode had pointers to dentries (and child dentries) without actually holding a refcount on said pointer. That is fundamentally broken, and while eventfs tried to then maintain coherence with dentries going away by hooking into the '.d_iput' callback, that doesn't actually work since it's not ordered wrt lookups. There were two reasonms why eventfs tried to keep a pointer to a dentry: - the creation of a 'events' directory would actually have a stable dentry pointer that it created with tracefs_start_creating(). And it needed that dentry when tearing it all down again in eventfs_remove_events_dir(). This use is actually ok, because the special top-level events directory dentries are actually stable, not just a temporary cache of the eventfs data structures. - the 'eventfs_inode' (aka ei) needs to stay around as long as there are dentries that refer to it. It then used these dentry pointers as a replacement for doing reference counting: it would try to make sure that there was only ever one dentry associated with an event_inode, and keep a child dentry array around to see which dentries might still refer to the parent ei. This gets rid of the invalid dentry pointer use, and renames the one valid case to a different name to make it clear that it's not just any random dentry. The magic child dentry array that is kind of a "reverse reference list" is simply replaced by having child dentries take a ref to the ei. As does the directory dentries. That makes the broken use case go away. Link: https://lore.kernel.org/linux-trace-kernel/202401291043.e62e89dc-oliver.sang@intel.com/ Link: https://lore.kernel.org/linux-trace-kernel/20240131185513.280463000@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Cc: Greg Kroah-Hartman Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions") Signed-off-by: Linus Torvalds Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 248 ++++++++++++--------------------------- fs/tracefs/internal.h | 7 +- 2 files changed, 78 insertions(+), 177 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index b2285d5f3fedf..515fdace1eeaf 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -62,6 +62,35 @@ enum { #define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1) +/* + * eventfs_inode reference count management. + * + * NOTE! We count only references from dentries, in the + * form 'dentry->d_fsdata'. There are also references from + * directory inodes ('ti->private'), but the dentry reference + * count is always a superset of the inode reference count. + */ +static void release_ei(struct kref *ref) +{ + struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref); + kfree(ei->entry_attrs); + kfree_const(ei->name); + kfree_rcu(ei, rcu); +} + +static inline void put_ei(struct eventfs_inode *ei) +{ + if (ei) + kref_put(&ei->kref, release_ei); +} + +static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei) +{ + if (ei) + kref_get(&ei->kref); + return ei; +} + static struct dentry *eventfs_root_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); @@ -289,7 +318,8 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode, * directory. The inode.i_private pointer will point to @data in the open() * call. */ -static struct dentry *lookup_file(struct dentry *dentry, +static struct dentry *lookup_file(struct eventfs_inode *parent_ei, + struct dentry *dentry, umode_t mode, struct eventfs_attr *attr, void *data, @@ -302,7 +332,7 @@ static struct dentry *lookup_file(struct dentry *dentry, mode |= S_IFREG; if (WARN_ON_ONCE(!S_ISREG(mode))) - return NULL; + return ERR_PTR(-EIO); inode = tracefs_get_inode(dentry->d_sb); if (unlikely(!inode)) @@ -321,9 +351,12 @@ static struct dentry *lookup_file(struct dentry *dentry, ti = get_tracefs(inode); ti->flags |= TRACEFS_EVENT_INODE; + // Files have their parent's ei as their fsdata + dentry->d_fsdata = get_ei(parent_ei); + d_add(dentry, inode); fsnotify_create(dentry->d_parent->d_inode, dentry); - return dentry; + return NULL; }; /** @@ -359,22 +392,29 @@ static struct dentry *lookup_dir_entry(struct dentry *dentry, /* Only directories have ti->private set to an ei, not files */ ti->private = ei; - dentry->d_fsdata = ei; - ei->dentry = dentry; // Remove me! + dentry->d_fsdata = get_ei(ei); inc_nlink(inode); d_add(dentry, inode); inc_nlink(dentry->d_parent->d_inode); fsnotify_mkdir(dentry->d_parent->d_inode, dentry); - return dentry; + return NULL; } -static void free_ei(struct eventfs_inode *ei) +static inline struct eventfs_inode *alloc_ei(const char *name) { - kfree_const(ei->name); - kfree(ei->d_children); - kfree(ei->entry_attrs); - kfree(ei); + struct eventfs_inode *ei = kzalloc(sizeof(*ei), GFP_KERNEL); + + if (!ei) + return NULL; + + ei->name = kstrdup_const(name, GFP_KERNEL); + if (!ei->name) { + kfree(ei); + return NULL; + } + kref_init(&ei->kref); + return ei; } /** @@ -385,39 +425,13 @@ static void free_ei(struct eventfs_inode *ei) */ void eventfs_d_release(struct dentry *dentry) { - struct eventfs_inode *ei; - int i; - - mutex_lock(&eventfs_mutex); - - ei = dentry->d_fsdata; - if (!ei) - goto out; - - /* This could belong to one of the files of the ei */ - if (ei->dentry != dentry) { - for (i = 0; i < ei->nr_entries; i++) { - if (ei->d_children[i] == dentry) - break; - } - if (WARN_ON_ONCE(i == ei->nr_entries)) - goto out; - ei->d_children[i] = NULL; - } else if (ei->is_freed) { - free_ei(ei); - } else { - ei->dentry = NULL; - } - - dentry->d_fsdata = NULL; - out: - mutex_unlock(&eventfs_mutex); + put_ei(dentry->d_fsdata); } /** * lookup_file_dentry - create a dentry for a file of an eventfs_inode * @ei: the eventfs_inode that the file will be created under - * @idx: the index into the d_children[] of the @ei + * @idx: the index into the entry_attrs[] of the @ei * @parent: The parent dentry of the created file. * @name: The name of the file to create * @mode: The mode of the file. @@ -434,17 +448,11 @@ lookup_file_dentry(struct dentry *dentry, const struct file_operations *fops) { struct eventfs_attr *attr = NULL; - struct dentry **e_dentry = &ei->d_children[idx]; if (ei->entry_attrs) attr = &ei->entry_attrs[idx]; - dentry->d_fsdata = ei; // NOTE: ei of _parent_ - lookup_file(dentry, mode, attr, data, fops); - - *e_dentry = dentry; // Remove me - - return dentry; + return lookup_file(ei, dentry, mode, attr, data, fops); } /** @@ -465,6 +473,7 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, struct tracefs_inode *ti; struct eventfs_inode *ei; const char *name = dentry->d_name.name; + struct dentry *result = NULL; ti = get_tracefs(dir); if (!(ti->flags & TRACEFS_EVENT_INODE)) @@ -481,7 +490,7 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, continue; if (ei_child->is_freed) goto out; - lookup_dir_entry(dentry, ei, ei_child); + result = lookup_dir_entry(dentry, ei, ei_child); goto out; } @@ -498,12 +507,12 @@ static struct dentry *eventfs_root_lookup(struct inode *dir, if (entry->callback(name, &mode, &data, &fops) <= 0) goto out; - lookup_file_dentry(dentry, ei, i, mode, data, fops); + result = lookup_file_dentry(dentry, ei, i, mode, data, fops); goto out; } out: mutex_unlock(&eventfs_mutex); - return NULL; + return result; } /* @@ -653,25 +662,10 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode if (!parent) return ERR_PTR(-EINVAL); - ei = kzalloc(sizeof(*ei), GFP_KERNEL); + ei = alloc_ei(name); if (!ei) return ERR_PTR(-ENOMEM); - ei->name = kstrdup_const(name, GFP_KERNEL); - if (!ei->name) { - kfree(ei); - return ERR_PTR(-ENOMEM); - } - - if (size) { - ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL); - if (!ei->d_children) { - kfree_const(ei->name); - kfree(ei); - return ERR_PTR(-ENOMEM); - } - } - ei->entries = entries; ei->nr_entries = size; ei->data = data; @@ -685,7 +679,7 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode /* Was the parent freed? */ if (list_empty(&ei->list)) { - free_ei(ei); + put_ei(ei); ei = NULL; } return ei; @@ -720,28 +714,20 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry if (IS_ERR(dentry)) return ERR_CAST(dentry); - ei = kzalloc(sizeof(*ei), GFP_KERNEL); + ei = alloc_ei(name); if (!ei) - goto fail_ei; + goto fail; inode = tracefs_get_inode(dentry->d_sb); if (unlikely(!inode)) goto fail; - if (size) { - ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL); - if (!ei->d_children) - goto fail; - } - - ei->dentry = dentry; + // Note: we have a ref to the dentry from tracefs_start_creating() + ei->events_dir = dentry; ei->entries = entries; ei->nr_entries = size; ei->is_events = 1; ei->data = data; - ei->name = kstrdup_const(name, GFP_KERNEL); - if (!ei->name) - goto fail; /* Save the ownership of this directory */ uid = d_inode(dentry->d_parent)->i_uid; @@ -772,7 +758,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry inode->i_op = &eventfs_root_dir_inode_operations; inode->i_fop = &eventfs_file_operations; - dentry->d_fsdata = ei; + dentry->d_fsdata = get_ei(ei); /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); @@ -784,72 +770,11 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry return ei; fail: - kfree(ei->d_children); - kfree(ei); - fail_ei: + put_ei(ei); tracefs_failed_creating(dentry); return ERR_PTR(-ENOMEM); } -static LLIST_HEAD(free_list); - -static void eventfs_workfn(struct work_struct *work) -{ - struct eventfs_inode *ei, *tmp; - struct llist_node *llnode; - - llnode = llist_del_all(&free_list); - llist_for_each_entry_safe(ei, tmp, llnode, llist) { - /* This dput() matches the dget() from unhook_dentry() */ - for (int i = 0; i < ei->nr_entries; i++) { - if (ei->d_children[i]) - dput(ei->d_children[i]); - } - /* This should only get here if it had a dentry */ - if (!WARN_ON_ONCE(!ei->dentry)) - dput(ei->dentry); - } -} - -static DECLARE_WORK(eventfs_work, eventfs_workfn); - -static void free_rcu_ei(struct rcu_head *head) -{ - struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu); - - if (ei->dentry) { - /* Do not free the ei until all references of dentry are gone */ - if (llist_add(&ei->llist, &free_list)) - queue_work(system_unbound_wq, &eventfs_work); - return; - } - - /* If the ei doesn't have a dentry, neither should its children */ - for (int i = 0; i < ei->nr_entries; i++) { - WARN_ON_ONCE(ei->d_children[i]); - } - - free_ei(ei); -} - -static void unhook_dentry(struct dentry *dentry) -{ - if (!dentry) - return; - /* - * Need to add a reference to the dentry that is expected by - * simple_recursive_removal(), which will include a dput(). - */ - dget(dentry); - - /* - * Also add a reference for the dput() in eventfs_workfn(). - * That is required as that dput() will free the ei after - * the SRCU grace period is over. - */ - dget(dentry); -} - /** * eventfs_remove_rec - remove eventfs dir or file from list * @ei: eventfs_inode to be removed. @@ -862,8 +787,6 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level) { struct eventfs_inode *ei_child; - if (!ei) - return; /* * Check recursion depth. It should never be greater than 3: * 0 - events/ @@ -875,28 +798,12 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level) return; /* search for nested folders or files */ - list_for_each_entry_srcu(ei_child, &ei->children, list, - lockdep_is_held(&eventfs_mutex)) { - /* Children only have dentry if parent does */ - WARN_ON_ONCE(ei_child->dentry && !ei->dentry); + list_for_each_entry(ei_child, &ei->children, list) eventfs_remove_rec(ei_child, level + 1); - } - ei->is_freed = 1; - - for (int i = 0; i < ei->nr_entries; i++) { - if (ei->d_children[i]) { - /* Children only have dentry if parent does */ - WARN_ON_ONCE(!ei->dentry); - unhook_dentry(ei->d_children[i]); - } - } - - unhook_dentry(ei->dentry); - - list_del_rcu(&ei->list); - call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei); + list_del(&ei->list); + put_ei(ei); } /** @@ -907,22 +814,12 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level) */ void eventfs_remove_dir(struct eventfs_inode *ei) { - struct dentry *dentry; - if (!ei) return; mutex_lock(&eventfs_mutex); - dentry = ei->dentry; eventfs_remove_rec(ei, 0); mutex_unlock(&eventfs_mutex); - - /* - * If any of the ei children has a dentry, then the ei itself - * must have a dentry. - */ - if (dentry) - simple_recursive_removal(dentry, NULL); } /** @@ -935,7 +832,11 @@ void eventfs_remove_events_dir(struct eventfs_inode *ei) { struct dentry *dentry; - dentry = ei->dentry; + dentry = ei->events_dir; + if (!dentry) + return; + + ei->events_dir = NULL; eventfs_remove_dir(ei); /* @@ -945,5 +846,6 @@ void eventfs_remove_events_dir(struct eventfs_inode *ei) * sticks around while the other ei->dentry are created * and destroyed dynamically. */ + d_invalidate(dentry); dput(dentry); } diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 4b50a0668055b..1886f1826cd83 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -35,8 +35,7 @@ struct eventfs_attr { * @entries: the array of entries representing the files in the directory * @name: the name of the directory to create * @children: link list into the child eventfs_inode - * @dentry: the dentry of the directory - * @d_children: The array of dentries to represent the files when created + * @events_dir: the dentry of the events directory * @entry_attrs: Saved mode and ownership of the @d_children * @attr: Saved mode and ownership of eventfs_inode itself * @data: The private data to pass to the callbacks @@ -45,12 +44,12 @@ struct eventfs_attr { * @nr_entries: The number of items in @entries */ struct eventfs_inode { + struct kref kref; struct list_head list; const struct eventfs_entry *entries; const char *name; struct list_head children; - struct dentry *dentry; /* Check is_freed to access */ - struct dentry **d_children; + struct dentry *events_dir; struct eventfs_attr *entry_attrs; struct eventfs_attr attr; void *data; -- GitLab From 70fbfc47a392b98e5f8dba70c6efc6839205c982 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:01 +0100 Subject: [PATCH 0522/1405] nvme-fc: do not wait in vain when unloading module The module exit path has race between deleting all controllers and freeing 'left over IDs'. To prevent double free a synchronization between nvme_delete_ctrl and ida_destroy has been added by the initial commit. There is some logic around trying to prevent from hanging forever in wait_for_completion, though it does not handling all cases. E.g. blktests is able to reproduce the situation where the module unload hangs forever. If we completely rely on the cleanup code executed from the nvme_delete_ctrl path, all IDs will be freed eventually. This makes calling ida_destroy unnecessary. We only have to ensure that all nvme_delete_ctrl code has been executed before we leave nvme_fc_exit_module. This is done by flushing the nvme_delete_wq workqueue. While at it, remove the unused nvme_fc_wq workqueue too. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 47 ++++++------------------------------------ 1 file changed, 6 insertions(+), 41 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 63a2e2839a789..5630d5689f28a 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -221,11 +221,6 @@ static LIST_HEAD(nvme_fc_lport_list); static DEFINE_IDA(nvme_fc_local_port_cnt); static DEFINE_IDA(nvme_fc_ctrl_cnt); -static struct workqueue_struct *nvme_fc_wq; - -static bool nvme_fc_waiting_to_unload; -static DECLARE_COMPLETION(nvme_fc_unload_proceed); - /* * These items are short-term. They will eventually be moved into * a generic FC class. See comments in module init. @@ -255,8 +250,6 @@ nvme_fc_free_lport(struct kref *ref) /* remove from transport list */ spin_lock_irqsave(&nvme_fc_lock, flags); list_del(&lport->port_list); - if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list)) - complete(&nvme_fc_unload_proceed); spin_unlock_irqrestore(&nvme_fc_lock, flags); ida_free(&nvme_fc_local_port_cnt, lport->localport.port_num); @@ -3896,10 +3889,6 @@ static int __init nvme_fc_init_module(void) { int ret; - nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0); - if (!nvme_fc_wq) - return -ENOMEM; - /* * NOTE: * It is expected that in the future the kernel will combine @@ -3917,7 +3906,7 @@ static int __init nvme_fc_init_module(void) ret = class_register(&fc_class); if (ret) { pr_err("couldn't register class fc\n"); - goto out_destroy_wq; + return ret; } /* @@ -3941,8 +3930,6 @@ static int __init nvme_fc_init_module(void) device_destroy(&fc_class, MKDEV(0, 0)); out_destroy_class: class_unregister(&fc_class); -out_destroy_wq: - destroy_workqueue(nvme_fc_wq); return ret; } @@ -3962,45 +3949,23 @@ nvme_fc_delete_controllers(struct nvme_fc_rport *rport) spin_unlock(&rport->lock); } -static void -nvme_fc_cleanup_for_unload(void) +static void __exit nvme_fc_exit_module(void) { struct nvme_fc_lport *lport; struct nvme_fc_rport *rport; - - list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { - list_for_each_entry(rport, &lport->endp_list, endp_list) { - nvme_fc_delete_controllers(rport); - } - } -} - -static void __exit nvme_fc_exit_module(void) -{ unsigned long flags; - bool need_cleanup = false; spin_lock_irqsave(&nvme_fc_lock, flags); - nvme_fc_waiting_to_unload = true; - if (!list_empty(&nvme_fc_lport_list)) { - need_cleanup = true; - nvme_fc_cleanup_for_unload(); - } + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) + list_for_each_entry(rport, &lport->endp_list, endp_list) + nvme_fc_delete_controllers(rport); spin_unlock_irqrestore(&nvme_fc_lock, flags); - if (need_cleanup) { - pr_info("%s: waiting for ctlr deletes\n", __func__); - wait_for_completion(&nvme_fc_unload_proceed); - pr_info("%s: ctrl deletes complete\n", __func__); - } + flush_workqueue(nvme_delete_wq); nvmf_unregister_transport(&nvme_fc_transport); - ida_destroy(&nvme_fc_local_port_cnt); - ida_destroy(&nvme_fc_ctrl_cnt); - device_destroy(&fc_class, MKDEV(0, 0)); class_unregister(&fc_class); - destroy_workqueue(nvme_fc_wq); } module_init(nvme_fc_init_module); -- GitLab From dcfad4ab4d6733f2861cd241d8532a0004fc835a Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:02 +0100 Subject: [PATCH 0523/1405] nvmet-fcloop: swap the list_add_tail arguments The first argument of list_add_tail function is the new element which should be added to the list which is the second argument. Swap the arguments to allow processing more than one element at a time. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fcloop.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index c1faeb1e9e556..1471af250ea62 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -358,7 +358,7 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, if (!rport->targetport) { tls_req->status = -ECONNREFUSED; spin_lock(&rport->lock); - list_add_tail(&rport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &rport->ls_list); spin_unlock(&rport->lock); queue_work(nvmet_wq, &rport->ls_work); return ret; @@ -391,7 +391,7 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, if (remoteport) { rport = remoteport->private; spin_lock(&rport->lock); - list_add_tail(&rport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &rport->ls_list); spin_unlock(&rport->lock); queue_work(nvmet_wq, &rport->ls_work); } @@ -446,7 +446,7 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, if (!tport->remoteport) { tls_req->status = -ECONNREFUSED; spin_lock(&tport->lock); - list_add_tail(&tport->ls_list, &tls_req->ls_list); + list_add_tail(&tls_req->ls_list, &tport->ls_list); spin_unlock(&tport->lock); queue_work(nvmet_wq, &tport->ls_work); return ret; -- GitLab From c691e6d7e13dab81ac8c7489c83b5dea972522a5 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:03 +0100 Subject: [PATCH 0524/1405] nvmet-fc: release reference on target port In case we return early out of __nvmet_fc_finish_ls_req() we still have to release the reference on the target port. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 05e6a755b3306..3d53d9dc10995 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -360,7 +360,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) if (!lsop->req_queued) { spin_unlock_irqrestore(&tgtport->lock, flags); - return; + goto out_puttgtport; } list_del(&lsop->lsreq_list); @@ -373,6 +373,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) (lsreq->rqstlen + lsreq->rsplen), DMA_BIDIRECTIONAL); +out_puttgtport: nvmet_fc_tgtport_put(tgtport); } -- GitLab From 4049dc96b8de7aeb3addcea039446e464726a525 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:04 +0100 Subject: [PATCH 0525/1405] nvmet-fc: defer cleanup using RCU properly When the target executes a disconnect and the host triggers a reconnect immediately, the reconnect command still finds an existing association. The reconnect crashes later on because nvmet_fc_delete_target_assoc blindly removes resources while the reconnect code wants to use it. To address this, nvmet_fc_find_target_assoc should not be able to lookup an association which is being removed. The association list is already under RCU lifetime management, so let's properly use it and remove the association from the list and wait for a grace period before cleaning up all. This means we also can drop the RCU management on the queues, because this is now handled via the association itself. A second step split the execution context so that the initial disconnect command can complete without running the reconnect code in the same context. As usual, this is done by deferring the ->done to a workqueue. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 83 ++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 46 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 3d53d9dc10995..11ecfef41bd15 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -166,7 +166,7 @@ struct nvmet_fc_tgt_assoc { struct nvmet_fc_hostport *hostport; struct nvmet_fc_ls_iod *rcv_disconn; struct list_head a_list; - struct nvmet_fc_tgt_queue __rcu *queues[NVMET_NR_QUEUES + 1]; + struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; struct kref ref; struct work_struct del_work; struct rcu_head rcu; @@ -803,14 +803,11 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, if (!queue) return NULL; - if (!nvmet_fc_tgt_a_get(assoc)) - goto out_free_queue; - queue->work_q = alloc_workqueue("ntfc%d.%d.%d", 0, 0, assoc->tgtport->fc_target_port.port_num, assoc->a_id, qid); if (!queue->work_q) - goto out_a_put; + goto out_free_queue; queue->qid = qid; queue->sqsize = sqsize; @@ -832,15 +829,13 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, goto out_fail_iodlist; WARN_ON(assoc->queues[qid]); - rcu_assign_pointer(assoc->queues[qid], queue); + assoc->queues[qid] = queue; return queue; out_fail_iodlist: nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue); destroy_workqueue(queue->work_q); -out_a_put: - nvmet_fc_tgt_a_put(assoc); out_free_queue: kfree(queue); return NULL; @@ -853,12 +848,8 @@ nvmet_fc_tgt_queue_free(struct kref *ref) struct nvmet_fc_tgt_queue *queue = container_of(ref, struct nvmet_fc_tgt_queue, ref); - rcu_assign_pointer(queue->assoc->queues[queue->qid], NULL); - nvmet_fc_destroy_fcp_iodlist(queue->assoc->tgtport, queue); - nvmet_fc_tgt_a_put(queue->assoc); - destroy_workqueue(queue->work_q); kfree_rcu(queue, rcu); @@ -970,7 +961,7 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport, rcu_read_lock(); list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (association_id == assoc->association_id) { - queue = rcu_dereference(assoc->queues[qid]); + queue = assoc->queues[qid]; if (queue && (!atomic_read(&queue->connected) || !nvmet_fc_tgt_q_get(queue))) @@ -1173,13 +1164,18 @@ nvmet_fc_target_assoc_free(struct kref *ref) struct nvmet_fc_tgtport *tgtport = assoc->tgtport; struct nvmet_fc_ls_iod *oldls; unsigned long flags; + int i; + + for (i = NVMET_NR_QUEUES; i >= 0; i--) { + if (assoc->queues[i]) + nvmet_fc_delete_target_queue(assoc->queues[i]); + } /* Send Disconnect now that all i/o has completed */ nvmet_fc_xmt_disconnect_assoc(assoc); nvmet_fc_free_hostport(assoc->hostport); spin_lock_irqsave(&tgtport->lock, flags); - list_del_rcu(&assoc->a_list); oldls = assoc->rcv_disconn; spin_unlock_irqrestore(&tgtport->lock, flags); /* if pending Rcv Disconnect Association LS, send rsp now */ @@ -1209,7 +1205,7 @@ static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) { struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - struct nvmet_fc_tgt_queue *queue; + unsigned long flags; int i, terminating; terminating = atomic_xchg(&assoc->terminating, 1); @@ -1218,29 +1214,21 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) if (terminating) return; + spin_lock_irqsave(&tgtport->lock, flags); + list_del_rcu(&assoc->a_list); + spin_unlock_irqrestore(&tgtport->lock, flags); - for (i = NVMET_NR_QUEUES; i >= 0; i--) { - rcu_read_lock(); - queue = rcu_dereference(assoc->queues[i]); - if (!queue) { - rcu_read_unlock(); - continue; - } + synchronize_rcu(); - if (!nvmet_fc_tgt_q_get(queue)) { - rcu_read_unlock(); - continue; - } - rcu_read_unlock(); - nvmet_fc_delete_target_queue(queue); - nvmet_fc_tgt_q_put(queue); + /* ensure all in-flight I/Os have been processed */ + for (i = NVMET_NR_QUEUES; i >= 0; i--) { + if (assoc->queues[i]) + flush_workqueue(assoc->queues[i]->work_q); } dev_info(tgtport->dev, "{%d:%d} Association deleted\n", tgtport->fc_target_port.port_num, assoc->a_id); - - nvmet_fc_tgt_a_put(assoc); } static struct nvmet_fc_tgt_assoc * @@ -1493,9 +1481,8 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); } rcu_read_unlock(); } @@ -1548,9 +1535,8 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, continue; assoc->hostport->invalid = 1; noassoc = false; - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); } spin_unlock_irqrestore(&tgtport->lock, flags); @@ -1582,7 +1568,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) rcu_read_lock(); list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { - queue = rcu_dereference(assoc->queues[0]); + queue = assoc->queues[0]; if (queue && queue->nvme_sq.ctrl == ctrl) { if (nvmet_fc_tgt_a_get(assoc)) found_ctrl = true; @@ -1594,9 +1580,8 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - if (!queue_work(nvmet_wq, &assoc->del_work)) - /* already deleting - release local reference */ - nvmet_fc_tgt_a_put(assoc); + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); return; } @@ -1626,6 +1611,8 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) /* terminate any outstanding associations */ __nvmet_fc_free_assocs(tgtport); + flush_workqueue(nvmet_wq); + /* * should terminate LS's as well. However, LS's will be generated * at the tail end of association termination, so they likely don't @@ -1871,9 +1858,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, sizeof(struct fcnvme_ls_disconnect_assoc_acc)), FCNVME_LS_DISCONNECT_ASSOC); - /* release get taken in nvmet_fc_find_target_assoc */ - nvmet_fc_tgt_a_put(assoc); - /* * The rules for LS response says the response cannot * go back until ABTS's have been sent for all outstanding @@ -1888,8 +1872,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, assoc->rcv_disconn = iod; spin_unlock_irqrestore(&tgtport->lock, flags); - nvmet_fc_delete_target_assoc(assoc); - if (oldls) { dev_info(tgtport->dev, "{%d:%d} Multiple Disconnect Association LS's " @@ -1905,6 +1887,9 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, nvmet_fc_xmt_ls_rsp(tgtport, oldls); } + queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_tgt_a_put(assoc); + return false; } @@ -2903,6 +2888,9 @@ nvmet_fc_remove_port(struct nvmet_port *port) nvmet_fc_portentry_unbind(pe); + /* terminate any outstanding associations */ + __nvmet_fc_free_assocs(pe->tgtport); + kfree(pe); } @@ -2934,6 +2922,9 @@ static int __init nvmet_fc_init_module(void) static void __exit nvmet_fc_exit_module(void) { + /* ensure any shutdown operation, e.g. delete ctrls have finished */ + flush_workqueue(nvmet_wq); + /* sanity check - all lports should be removed */ if (!list_empty(&nvmet_fc_target_list)) pr_warn("%s: targetport list not empty\n", __func__); -- GitLab From c5e27b1a779ec25779d04c3af65aebaee6bd4304 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:05 +0100 Subject: [PATCH 0526/1405] nvmet-fc: free queue and assoc directly Neither struct nvmet_fc_tgt_queue nor struct nvmet_fc_tgt_assoc are data structure which are used in a RCU context. So there is no reason to delay the free operation. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 11ecfef41bd15..b44b99525c446 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -145,7 +145,6 @@ struct nvmet_fc_tgt_queue { struct list_head avail_defer_list; struct workqueue_struct *work_q; struct kref ref; - struct rcu_head rcu; /* array of fcp_iods */ struct nvmet_fc_fcp_iod fod[] __counted_by(sqsize); } __aligned(sizeof(unsigned long long)); @@ -169,7 +168,6 @@ struct nvmet_fc_tgt_assoc { struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; struct kref ref; struct work_struct del_work; - struct rcu_head rcu; }; @@ -852,7 +850,7 @@ nvmet_fc_tgt_queue_free(struct kref *ref) destroy_workqueue(queue->work_q); - kfree_rcu(queue, rcu); + kfree(queue); } static void @@ -1185,8 +1183,8 @@ nvmet_fc_target_assoc_free(struct kref *ref) dev_info(tgtport->dev, "{%d:%d} Association freed\n", tgtport->fc_target_port.port_num, assoc->a_id); - kfree_rcu(assoc, rcu); nvmet_fc_tgtport_put(tgtport); + kfree(assoc); } static void -- GitLab From ca121a0f7515591dba0eb5532bfa7ace4dc153ce Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:06 +0100 Subject: [PATCH 0527/1405] nvmet-fc: hold reference on hostport match The hostport data structure is shared between the association, this why we keep track of the users via a refcount. So we should not decrement the refcount on a match and free the hostport several times. Reported by KASAN. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index b44b99525c446..205a12b1e8413 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1068,8 +1068,6 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) /* new allocation not needed */ kfree(newhost); newhost = match; - /* no new allocation - release reference */ - nvmet_fc_tgtport_put(tgtport); } else { newhost->tgtport = tgtport; newhost->hosthandle = hosthandle; -- GitLab From 50b474e1faff50f770410f402ebbdf48bf8cc9b0 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:07 +0100 Subject: [PATCH 0528/1405] nvmet-fc: remove null hostport pointer check An association has always a valid hostport pointer. Remove useless null pointer check. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 205a12b1e8413..849d9b17c60ad 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -488,8 +488,7 @@ nvmet_fc_xmt_disconnect_assoc(struct nvmet_fc_tgt_assoc *assoc) * message is normal. Otherwise, send unless the hostport has * already been invalidated by the lldd. */ - if (!tgtport->ops->ls_req || !assoc->hostport || - assoc->hostport->invalid) + if (!tgtport->ops->ls_req || assoc->hostport->invalid) return; lsop = kzalloc((sizeof(*lsop) + @@ -1524,8 +1523,7 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, spin_lock_irqsave(&tgtport->lock, flags); list_for_each_entry_safe(assoc, next, &tgtport->assoc_list, a_list) { - if (!assoc->hostport || - assoc->hostport->hosthandle != hosthandle) + if (assoc->hostport->hosthandle != hosthandle) continue; if (!nvmet_fc_tgt_a_get(assoc)) continue; -- GitLab From 1c110588dd95d21782397ff3cbaa55820b4e1fad Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:08 +0100 Subject: [PATCH 0529/1405] nvmet-fc: do not tack refs on tgtports from assoc The association life time is tied to the life time of the target port. That means we should not take extra a refcount when creating a association. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 849d9b17c60ad..fe3246024836e 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1107,12 +1107,9 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) if (idx < 0) goto out_free_assoc; - if (!nvmet_fc_tgtport_get(tgtport)) - goto out_ida; - assoc->hostport = nvmet_fc_alloc_hostport(tgtport, hosthandle); if (IS_ERR(assoc->hostport)) - goto out_put; + goto out_ida; assoc->tgtport = tgtport; assoc->a_id = idx; @@ -1142,8 +1139,6 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) return assoc; -out_put: - nvmet_fc_tgtport_put(tgtport); out_ida: ida_free(&tgtport->assoc_cnt, idx); out_free_assoc: @@ -1180,7 +1175,6 @@ nvmet_fc_target_assoc_free(struct kref *ref) dev_info(tgtport->dev, "{%d:%d} Association freed\n", tgtport->fc_target_port.port_num, assoc->a_id); - nvmet_fc_tgtport_put(tgtport); kfree(assoc); } -- GitLab From 3146345c2e9c2f661527054e402b0cfad80105a4 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:09 +0100 Subject: [PATCH 0530/1405] nvmet-fc: abort command when there is no binding When the target port has not active port binding, there is no point in trying to process the command as it has to fail anyway. Instead adding checks to all commands abort the command early. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index fe3246024836e..c80b8a066fd17 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1099,6 +1099,9 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) int idx; bool needrandom = true; + if (!tgtport->pe) + return NULL; + assoc = kzalloc(sizeof(*assoc), GFP_KERNEL); if (!assoc) return NULL; @@ -2514,8 +2517,9 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, fod->req.cmd = &fod->cmdiubuf.sqe; fod->req.cqe = &fod->rspiubuf.cqe; - if (tgtport->pe) - fod->req.port = tgtport->pe->port; + if (!tgtport->pe) + goto transport_error; + fod->req.port = tgtport->pe->port; /* clear any response payload */ memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf)); -- GitLab From 710c69dbaccdac312e32931abcb8499c1525d397 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:10 +0100 Subject: [PATCH 0531/1405] nvmet-fc: avoid deadlock on delete association path When deleting an association the shutdown path is deadlocking because we try to flush the nvmet_wq nested. Avoid this by deadlock by deferring the put work into its own work item. Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index c80b8a066fd17..3e0d391e631b1 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -111,6 +111,8 @@ struct nvmet_fc_tgtport { struct nvmet_fc_port_entry *pe; struct kref ref; u32 max_sg_cnt; + + struct work_struct put_work; }; struct nvmet_fc_port_entry { @@ -247,6 +249,13 @@ static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue); static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport); +static void nvmet_fc_put_tgtport_work(struct work_struct *work) +{ + struct nvmet_fc_tgtport *tgtport = + container_of(work, struct nvmet_fc_tgtport, put_work); + + nvmet_fc_tgtport_put(tgtport); +} static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport); static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod); @@ -358,7 +367,7 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) if (!lsop->req_queued) { spin_unlock_irqrestore(&tgtport->lock, flags); - goto out_puttgtport; + goto out_putwork; } list_del(&lsop->lsreq_list); @@ -371,8 +380,8 @@ __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) (lsreq->rqstlen + lsreq->rsplen), DMA_BIDIRECTIONAL); -out_puttgtport: - nvmet_fc_tgtport_put(tgtport); +out_putwork: + queue_work(nvmet_wq, &tgtport->put_work); } static int @@ -1396,6 +1405,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, kref_init(&newrec->ref); ida_init(&newrec->assoc_cnt); newrec->max_sg_cnt = template->max_sgl_segments; + INIT_WORK(&newrec->put_work, nvmet_fc_put_tgtport_work); ret = nvmet_fc_alloc_ls_iodlist(newrec); if (ret) { -- GitLab From fe506a74589326183297d5abdda02d0c76ae5a8b Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:11 +0100 Subject: [PATCH 0532/1405] nvmet-fc: take ref count on tgtport before delete assoc We have to ensure that the tgtport is not going away before be have remove all the associations. Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 3e0d391e631b1..671d096745a5b 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1090,13 +1090,28 @@ nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) } static void -nvmet_fc_delete_assoc(struct work_struct *work) +nvmet_fc_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + nvmet_fc_delete_target_assoc(assoc); + nvmet_fc_tgt_a_put(assoc); +} + +static void +nvmet_fc_delete_assoc_work(struct work_struct *work) { struct nvmet_fc_tgt_assoc *assoc = container_of(work, struct nvmet_fc_tgt_assoc, del_work); + struct nvmet_fc_tgtport *tgtport = assoc->tgtport; - nvmet_fc_delete_target_assoc(assoc); - nvmet_fc_tgt_a_put(assoc); + nvmet_fc_delete_assoc(assoc); + nvmet_fc_tgtport_put(tgtport); +} + +static void +nvmet_fc_schedule_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + nvmet_fc_tgtport_get(assoc->tgtport); + queue_work(nvmet_wq, &assoc->del_work); } static struct nvmet_fc_tgt_assoc * @@ -1127,7 +1142,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) assoc->a_id = idx; INIT_LIST_HEAD(&assoc->a_list); kref_init(&assoc->ref); - INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc); + INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc_work); atomic_set(&assoc->terminating, 0); while (needrandom) { @@ -1483,7 +1498,7 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); } rcu_read_unlock(); @@ -1536,7 +1551,7 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, continue; assoc->hostport->invalid = 1; noassoc = false; - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); } spin_unlock_irqrestore(&tgtport->lock, flags); @@ -1581,7 +1596,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); return; } @@ -1888,7 +1903,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, nvmet_fc_xmt_ls_rsp(tgtport, oldls); } - queue_work(nvmet_wq, &assoc->del_work); + nvmet_fc_schedule_delete_assoc(assoc); nvmet_fc_tgt_a_put(assoc); return false; -- GitLab From a90ac7b348770ff3d246fac575306783de381e51 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 31 Jan 2024 09:51:12 +0100 Subject: [PATCH 0533/1405] nvmet-fc: use RCU list iterator for assoc_list The assoc_list is a RCU protected list, thus use the RCU flavor of list functions. Let's use this opportunity and refactor this code and move the lookup into a helper and give it a descriptive name. Signed-off-by: Daniel Wagner Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 671d096745a5b..fd229f310c931 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1114,14 +1114,27 @@ nvmet_fc_schedule_delete_assoc(struct nvmet_fc_tgt_assoc *assoc) queue_work(nvmet_wq, &assoc->del_work); } +static bool +nvmet_fc_assoc_exits(struct nvmet_fc_tgtport *tgtport, u64 association_id) +{ + struct nvmet_fc_tgt_assoc *a; + + list_for_each_entry_rcu(a, &tgtport->assoc_list, a_list) { + if (association_id == a->association_id) + return true; + } + + return false; +} + static struct nvmet_fc_tgt_assoc * nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) { - struct nvmet_fc_tgt_assoc *assoc, *tmpassoc; + struct nvmet_fc_tgt_assoc *assoc; unsigned long flags; + bool done; u64 ran; int idx; - bool needrandom = true; if (!tgtport->pe) return NULL; @@ -1145,24 +1158,21 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc_work); atomic_set(&assoc->terminating, 0); - while (needrandom) { + done = false; + do { get_random_bytes(&ran, sizeof(ran) - BYTES_FOR_QID); ran = ran << BYTES_FOR_QID_SHIFT; spin_lock_irqsave(&tgtport->lock, flags); - needrandom = false; - list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list) { - if (ran == tmpassoc->association_id) { - needrandom = true; - break; - } - } - if (!needrandom) { + rcu_read_lock(); + if (!nvmet_fc_assoc_exits(tgtport, ran)) { assoc->association_id = ran; list_add_tail_rcu(&assoc->a_list, &tgtport->assoc_list); + done = true; } + rcu_read_unlock(); spin_unlock_irqrestore(&tgtport->lock, flags); - } + } while (!done); return assoc; -- GitLab From 524719b4c60dc5c5edff0cdfd715b4e2d6c16ede Mon Sep 17 00:00:00 2001 From: "Nitin U. Yewale" Date: Mon, 29 Jan 2024 16:36:37 +0530 Subject: [PATCH 0534/1405] nvme-tcp: show hostnqn when connecting to tcp target Log hostnqn when connecting to nvme target. As hostnqn could be changed, logging this information in syslog at appropriate time may help in troubleshooting. Signed-off-by: Nitin U. Yewale Reviewed-by: John Meneghini Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 9f8dea2e2c7d5..a6d596e056021 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2753,8 +2753,8 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, if (ret) goto out_uninit_ctrl; - dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n", - nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr); + dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp, hostnqn: %s\n", + nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr, opts->host->nqn); mutex_lock(&nvme_tcp_ctrl_mutex); list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list); -- GitLab From d2045e6a4e2f3bcb3a2dab6684f8fbc95239d566 Mon Sep 17 00:00:00 2001 From: "Nitin U. Yewale" Date: Mon, 29 Jan 2024 16:36:38 +0530 Subject: [PATCH 0535/1405] nvme-rdma: show hostnqn when connecting to rdma target Log hostnqn when connecting to nvme target. As hostnqn could be changed, logging this information in syslog at appropriate time may help in troubleshooting. Signed-off-by: Nitin U. Yewale Reviewed-by: John Meneghini Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 6adf2c19a7120..20fdd40b1879f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2300,8 +2300,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, if (ret) goto out_uninit_ctrl; - dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n", - nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr); + dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs, hostnqn: %s\n", + nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr, opts->host->nqn); mutex_lock(&nvme_rdma_ctrl_mutex); list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list); -- GitLab From c3a846fe4bf3ecbdf48e561b90fea6cc27ac6423 Mon Sep 17 00:00:00 2001 From: "Nitin U. Yewale" Date: Mon, 29 Jan 2024 16:36:39 +0530 Subject: [PATCH 0536/1405] nvme-fc: show hostnqn when connecting to fc target Log hostnqn when connecting to nvme target. As hostnqn could be changed, logging this information in syslog at appropriate time may help in troubleshooting. Signed-off-by: Nitin U. Yewale Reviewed-by: John Meneghini Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 5630d5689f28a..68a5d971657bb 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3570,8 +3570,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, flush_delayed_work(&ctrl->connect_work); dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", - ctrl->cnum, nvmf_ctrl_subsysnqn(&ctrl->ctrl)); + "NVME-FC{%d}: new ctrl: NQN \"%s\", hostnqn: %s\n", + ctrl->cnum, nvmf_ctrl_subsysnqn(&ctrl->ctrl), opts->host->nqn); return &ctrl->ctrl; -- GitLab From 9f079dda14339ee87d864306a9dc8c6b4e4da40b Mon Sep 17 00:00:00 2001 From: Alan Adamson Date: Mon, 29 Jan 2024 16:19:38 -0800 Subject: [PATCH 0537/1405] nvme: allow passthru cmd error logging Commit d7ac8dca938c ("nvme: quiet user passthrough command errors") disabled error logging for user passthrough commands. This commit adds the ability to opt-in to passthrough admin error logging. IO commands initiated as passthrough will always be logged. The logging output for passthrough commands (Admin and IO) has been changed to include CDWXX fields. nvme0n1: Read(0x2), LBA Out of Range (sct 0x0 / sc 0x80) DNR cdw10=0x0 cdw11=0x1 cdw12=0x70000 cdw13=0x0 cdw14=0x0 cdw15=0x0 Add a helper function nvme_log_err_passthru() which allows us to log error for passthru commands by decoding cdw10-cdw15 values of nvme command. Add a new sysfs attr passthru_err_log_enabled that allows user to conditionally enable passthrough command logging for either passthrough Admin commands sent to the controller or passthrough IO commands sent to a namespace. By default, passthrough error logging is disabled. To enable passthrough admin error logging: echo 1 > /sys/class/nvme/nvme0/passthru_err_log_enabled To disable passthrough admin error logging: echo 0 > /sys/class/nvme/nvme0/passthru_err_log_enabled To enable passthrough io error logging: echo 1 > /sys/class/nvme/nvme0/nvme0n1/passthru_err_log_enabled To disable passthrough io error logging: echo 0 > /sys/class/nvme/nvme0/nvme0n1/passthru_err_log_enabled Signed-off-by: Alan Adamson Signed-off-by: Chaitanya Kulkarni Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 59 +++++++++++++++++++++++++++++++++++---- drivers/nvme/host/nvme.h | 3 +- drivers/nvme/host/sysfs.c | 58 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8b48b7f7871ad..0d124a8ca9c32 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -338,6 +338,30 @@ static void nvme_log_error(struct request *req) nr->status & NVME_SC_DNR ? "DNR " : ""); } +static void nvme_log_err_passthru(struct request *req) +{ + struct nvme_ns *ns = req->q->queuedata; + struct nvme_request *nr = nvme_req(req); + + pr_err_ratelimited("%s: %s(0x%x), %s (sct 0x%x / sc 0x%x) %s%s" + "cdw10=0x%x cdw11=0x%x cdw12=0x%x cdw13=0x%x cdw14=0x%x cdw15=0x%x\n", + ns ? ns->disk->disk_name : dev_name(nr->ctrl->device), + ns ? nvme_get_opcode_str(nr->cmd->common.opcode) : + nvme_get_admin_opcode_str(nr->cmd->common.opcode), + nr->cmd->common.opcode, + nvme_get_error_status_str(nr->status), + nr->status >> 8 & 7, /* Status Code Type */ + nr->status & 0xff, /* Status Code */ + nr->status & NVME_SC_MORE ? "MORE " : "", + nr->status & NVME_SC_DNR ? "DNR " : "", + nr->cmd->common.cdw10, + nr->cmd->common.cdw11, + nr->cmd->common.cdw12, + nr->cmd->common.cdw13, + nr->cmd->common.cdw14, + nr->cmd->common.cdw14); +} + enum nvme_disposition { COMPLETE, RETRY, @@ -385,8 +409,12 @@ static inline void nvme_end_req(struct request *req) { blk_status_t status = nvme_error_status(nvme_req(req)->status); - if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) - nvme_log_error(req); + if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) { + if (blk_rq_is_passthrough(req)) + nvme_log_err_passthru(req); + else + nvme_log_error(req); + } nvme_end_req_zoned(req); nvme_trace_bio_complete(req); if (req->cmd_flags & REQ_NVME_MPATH) @@ -679,10 +707,21 @@ static inline void nvme_clear_nvme_request(struct request *req) /* initialize a passthrough request */ void nvme_init_request(struct request *req, struct nvme_command *cmd) { - if (req->q->queuedata) + struct nvme_request *nr = nvme_req(req); + bool logging_enabled; + + if (req->q->queuedata) { + struct nvme_ns *ns = req->q->disk->private_data; + + logging_enabled = ns->passthru_err_log_enabled; req->timeout = NVME_IO_TIMEOUT; - else /* no queuedata implies admin queue */ + } else { /* no queuedata implies admin queue */ + logging_enabled = nr->ctrl->passthru_err_log_enabled; req->timeout = NVME_ADMIN_TIMEOUT; + } + + if (!logging_enabled) + req->rq_flags |= RQF_QUIET; /* passthru commands should let the driver set the SGL flags */ cmd->common.flags &= ~NVME_CMD_SGL_ALL; @@ -691,8 +730,7 @@ void nvme_init_request(struct request *req, struct nvme_command *cmd) if (req->mq_hctx->type == HCTX_TYPE_POLL) req->cmd_flags |= REQ_POLLED; nvme_clear_nvme_request(req); - req->rq_flags |= RQF_QUIET; - memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd)); + memcpy(nr->cmd, cmd, sizeof(*cmd)); } EXPORT_SYMBOL_GPL(nvme_init_request); @@ -3658,6 +3696,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) ns->disk = disk; ns->queue = disk->queue; + ns->passthru_err_log_enabled = false; if (ctrl->opts && ctrl->opts->data_digest) blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue); @@ -3721,6 +3760,13 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) nvme_mpath_add_disk(ns, info->anagrpid); nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name); + /* + * Set ns->disk->device->driver_data to ns so we can access + * ns->logging_enabled in nvme_passthru_err_log_enabled_store() and + * nvme_passthru_err_log_enabled_show(). + */ + dev_set_drvdata(disk_to_dev(ns->disk), ns); + return; out_cleanup_ns_from_list: @@ -4521,6 +4567,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, int ret; WRITE_ONCE(ctrl->state, NVME_CTRL_NEW); + ctrl->passthru_err_log_enabled = false; clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags); spin_lock_init(&ctrl->lock); mutex_init(&ctrl->scan_lock); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index b70b333a0874f..3897334e3950d 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -263,6 +263,7 @@ enum nvme_ctrl_flags { struct nvme_ctrl { bool comp_seen; bool identified; + bool passthru_err_log_enabled; enum nvme_ctrl_state state; spinlock_t lock; struct mutex scan_lock; @@ -522,7 +523,7 @@ struct nvme_ns { struct device cdev_device; struct nvme_fault_inject fault_inject; - + bool passthru_err_log_enabled; }; /* NVMe ns supports metadata actions by the controller (generate/strip) */ diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 6b2f06f190de3..d099218e494a8 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -35,6 +35,62 @@ static ssize_t nvme_sysfs_rescan(struct device *dev, } static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan); +static ssize_t nvme_adm_passthru_err_log_enabled_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + return sysfs_emit(buf, + ctrl->passthru_err_log_enabled ? "on\n" : "off\n"); +} + +static ssize_t nvme_adm_passthru_err_log_enabled_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + int err; + bool passthru_err_log_enabled; + + err = kstrtobool(buf, &passthru_err_log_enabled); + if (err) + return -EINVAL; + + ctrl->passthru_err_log_enabled = passthru_err_log_enabled; + + return count; +} + +static ssize_t nvme_io_passthru_err_log_enabled_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ns *n = dev_get_drvdata(dev); + + return sysfs_emit(buf, n->passthru_err_log_enabled ? "on\n" : "off\n"); +} + +static ssize_t nvme_io_passthru_err_log_enabled_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct nvme_ns *ns = dev_get_drvdata(dev); + int err; + bool passthru_err_log_enabled; + + err = kstrtobool(buf, &passthru_err_log_enabled); + if (err) + return -EINVAL; + ns->passthru_err_log_enabled = passthru_err_log_enabled; + + return count; +} + +static struct device_attribute dev_attr_adm_passthru_err_log_enabled = \ + __ATTR(passthru_err_log_enabled, S_IRUGO | S_IWUSR, \ + nvme_adm_passthru_err_log_enabled_show, nvme_adm_passthru_err_log_enabled_store); + +static struct device_attribute dev_attr_io_passthru_err_log_enabled = \ + __ATTR(passthru_err_log_enabled, S_IRUGO | S_IWUSR, \ + nvme_io_passthru_err_log_enabled_show, nvme_io_passthru_err_log_enabled_store); + static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev) { struct gendisk *disk = dev_to_disk(dev); @@ -208,6 +264,7 @@ static struct attribute *nvme_ns_attrs[] = { &dev_attr_ana_grpid.attr, &dev_attr_ana_state.attr, #endif + &dev_attr_io_passthru_err_log_enabled.attr, NULL, }; @@ -655,6 +712,7 @@ static struct attribute *nvme_dev_attrs[] = { #ifdef CONFIG_NVME_TCP_TLS &dev_attr_tls_key.attr, #endif + &dev_attr_adm_passthru_err_log_enabled.attr, NULL }; -- GitLab From ae3f4b44641dfff969604735a0dcbf931f383285 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 31 Jan 2024 02:21:49 -0800 Subject: [PATCH 0538/1405] net: sysfs: Fix /sys/class/net/ path The documentation is pointing to the wrong path for the interface. Documentation is pointing to /sys/class/, instead of /sys/class/net/. Fix it by adding the `net/` directory before the interface. Fixes: 1a02ef76acfa ("net: sysfs: add documentation entries for /sys/class//queues") Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240131102150.728960-2-leitao@debian.org Signed-off-by: Jakub Kicinski --- .../ABI/testing/sysfs-class-net-queues | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues index 906ff3ca928ac..5bff64d256c20 100644 --- a/Documentation/ABI/testing/sysfs-class-net-queues +++ b/Documentation/ABI/testing/sysfs-class-net-queues @@ -1,4 +1,4 @@ -What: /sys/class//queues/rx-/rps_cpus +What: /sys/class/net//queues/rx-/rps_cpus Date: March 2010 KernelVersion: 2.6.35 Contact: netdev@vger.kernel.org @@ -8,7 +8,7 @@ Description: network device queue. Possible values depend on the number of available CPU(s) in the system. -What: /sys/class//queues/rx-/rps_flow_cnt +What: /sys/class/net//queues/rx-/rps_flow_cnt Date: April 2010 KernelVersion: 2.6.35 Contact: netdev@vger.kernel.org @@ -16,7 +16,7 @@ Description: Number of Receive Packet Steering flows being currently processed by this particular network device receive queue. -What: /sys/class//queues/tx-/tx_timeout +What: /sys/class/net//queues/tx-/tx_timeout Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -24,7 +24,7 @@ Description: Indicates the number of transmit timeout events seen by this network interface transmit queue. -What: /sys/class//queues/tx-/tx_maxrate +What: /sys/class/net//queues/tx-/tx_maxrate Date: March 2015 KernelVersion: 4.1 Contact: netdev@vger.kernel.org @@ -32,7 +32,7 @@ Description: A Mbps max-rate set for the queue, a value of zero means disabled, default is disabled. -What: /sys/class//queues/tx-/xps_cpus +What: /sys/class/net//queues/tx-/xps_cpus Date: November 2010 KernelVersion: 2.6.38 Contact: netdev@vger.kernel.org @@ -42,7 +42,7 @@ Description: network device transmit queue. Possible values depend on the number of available CPU(s) in the system. -What: /sys/class//queues/tx-/xps_rxqs +What: /sys/class/net//queues/tx-/xps_rxqs Date: June 2018 KernelVersion: 4.18.0 Contact: netdev@vger.kernel.org @@ -53,7 +53,7 @@ Description: number of available receive queue(s) in the network device. Default is disabled. -What: /sys/class//queues/tx-/byte_queue_limits/hold_time +What: /sys/class/net//queues/tx-/byte_queue_limits/hold_time Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -62,7 +62,7 @@ Description: of this particular network device transmit queue. Default value is 1000. -What: /sys/class//queues/tx-/byte_queue_limits/inflight +What: /sys/class/net//queues/tx-/byte_queue_limits/inflight Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -70,7 +70,7 @@ Description: Indicates the number of bytes (objects) in flight on this network device transmit queue. -What: /sys/class//queues/tx-/byte_queue_limits/limit +What: /sys/class/net//queues/tx-/byte_queue_limits/limit Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -79,7 +79,7 @@ Description: on this network device transmit queue. This value is clamped to be within the bounds defined by limit_max and limit_min. -What: /sys/class//queues/tx-/byte_queue_limits/limit_max +What: /sys/class/net//queues/tx-/byte_queue_limits/limit_max Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org @@ -88,7 +88,7 @@ Description: queued on this network device transmit queue. See include/linux/dynamic_queue_limits.h for the default value. -What: /sys/class//queues/tx-/byte_queue_limits/limit_min +What: /sys/class/net//queues/tx-/byte_queue_limits/limit_min Date: November 2011 KernelVersion: 3.3 Contact: netdev@vger.kernel.org -- GitLab From 7b55984c96ffe9e236eb9c82a2196e0b1f84990d Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 29 Jan 2024 14:03:08 +0100 Subject: [PATCH 0539/1405] xen-netback: properly sync TX responses Invoking the make_tx_response() / push_tx_responses() pair with no lock held would be acceptable only if all such invocations happened from the same context (NAPI instance or dealloc thread). Since this isn't the case, and since the interface "spec" also doesn't demand that multicast operations may only be performed with no in-flight transmits, MCAST_{ADD,DEL} processing also needs to acquire the response lock around the invocations. To prevent similar mistakes going forward, "downgrade" the present functions to private helpers of just the two remaining ones using them directly, with no forward declarations anymore. This involves renaming what so far was make_tx_response(), for the new function of that name to serve the new (wrapper) purpose. While there, - constify the txp parameters, - correct xenvif_idx_release()'s status parameter's type, - rename {,_}make_tx_response()'s status parameters for consistency with xenvif_idx_release()'s. Fixes: 210c34dcd8d9 ("xen-netback: add support for multicast control") Cc: stable@vger.kernel.org Signed-off-by: Jan Beulich Reviewed-by: Paul Durrant Link: https://lore.kernel.org/r/980c6c3d-e10e-4459-8565-e8fbde122f00@suse.com Signed-off-by: Jakub Kicinski --- drivers/net/xen-netback/netback.c | 84 +++++++++++++++---------------- 1 file changed, 40 insertions(+), 44 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index d7503aef599f0..fab361a250d60 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -104,13 +104,12 @@ bool provides_xdp_headroom = true; module_param(provides_xdp_headroom, bool, 0644); static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, - u8 status); + s8 status); static void make_tx_response(struct xenvif_queue *queue, - struct xen_netif_tx_request *txp, + const struct xen_netif_tx_request *txp, unsigned int extra_count, - s8 st); -static void push_tx_responses(struct xenvif_queue *queue); + s8 status); static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); @@ -208,13 +207,9 @@ static void xenvif_tx_err(struct xenvif_queue *queue, unsigned int extra_count, RING_IDX end) { RING_IDX cons = queue->tx.req_cons; - unsigned long flags; do { - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, flags); if (cons == end) break; RING_COPY_REQUEST(&queue->tx, cons++, txp); @@ -465,12 +460,7 @@ static void xenvif_get_requests(struct xenvif_queue *queue, for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS; nr_slots--) { if (unlikely(!txp->size)) { - unsigned long flags; - - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, flags); ++txp; continue; } @@ -496,14 +486,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue, for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; ++txp) { if (unlikely(!txp->size)) { - unsigned long flags; - - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, - flags); continue; } @@ -995,7 +979,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, (ret == 0) ? XEN_NETIF_RSP_OKAY : XEN_NETIF_RSP_ERROR); - push_tx_responses(queue); continue; } @@ -1007,7 +990,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, make_tx_response(queue, &txreq, extra_count, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); continue; } @@ -1433,8 +1415,35 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget) return work_done; } +static void _make_tx_response(struct xenvif_queue *queue, + const struct xen_netif_tx_request *txp, + unsigned int extra_count, + s8 status) +{ + RING_IDX i = queue->tx.rsp_prod_pvt; + struct xen_netif_tx_response *resp; + + resp = RING_GET_RESPONSE(&queue->tx, i); + resp->id = txp->id; + resp->status = status; + + while (extra_count-- != 0) + RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL; + + queue->tx.rsp_prod_pvt = ++i; +} + +static void push_tx_responses(struct xenvif_queue *queue) +{ + int notify; + + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify); + if (notify) + notify_remote_via_irq(queue->tx_irq); +} + static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, - u8 status) + s8 status) { struct pending_tx_info *pending_tx_info; pending_ring_idx_t index; @@ -1444,8 +1453,8 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, spin_lock_irqsave(&queue->response_lock, flags); - make_tx_response(queue, &pending_tx_info->req, - pending_tx_info->extra_count, status); + _make_tx_response(queue, &pending_tx_info->req, + pending_tx_info->extra_count, status); /* Release the pending index before pusing the Tx response so * its available before a new Tx request is pushed by the @@ -1459,32 +1468,19 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, spin_unlock_irqrestore(&queue->response_lock, flags); } - static void make_tx_response(struct xenvif_queue *queue, - struct xen_netif_tx_request *txp, + const struct xen_netif_tx_request *txp, unsigned int extra_count, - s8 st) + s8 status) { - RING_IDX i = queue->tx.rsp_prod_pvt; - struct xen_netif_tx_response *resp; - - resp = RING_GET_RESPONSE(&queue->tx, i); - resp->id = txp->id; - resp->status = st; - - while (extra_count-- != 0) - RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL; + unsigned long flags; - queue->tx.rsp_prod_pvt = ++i; -} + spin_lock_irqsave(&queue->response_lock, flags); -static void push_tx_responses(struct xenvif_queue *queue) -{ - int notify; + _make_tx_response(queue, txp, extra_count, status); + push_tx_responses(queue); - RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify); - if (notify) - notify_remote_via_irq(queue->tx_irq); + spin_unlock_irqrestore(&queue->response_lock, flags); } static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) -- GitLab From e0526ec5360a48ad3ab2e26e802b0532302a7e11 Mon Sep 17 00:00:00 2001 From: Souradeep Chakrabarti Date: Tue, 30 Jan 2024 23:35:51 -0800 Subject: [PATCH 0540/1405] hv_netvsc: Fix race condition between netvsc_probe and netvsc_remove In commit ac5047671758 ("hv_netvsc: Disable NAPI before closing the VMBus channel"), napi_disable was getting called for all channels, including all subchannels without confirming if they are enabled or not. This caused hv_netvsc getting hung at napi_disable, when netvsc_probe() has finished running but nvdev->subchan_work has not started yet. netvsc_subchan_work() -> rndis_set_subchannel() has not created the sub-channels and because of that netvsc_sc_open() is not running. netvsc_remove() calls cancel_work_sync(&nvdev->subchan_work), for which netvsc_subchan_work did not run. netif_napi_add() sets the bit NAPI_STATE_SCHED because it ensures NAPI cannot be scheduled. Then netvsc_sc_open() -> napi_enable will clear the NAPIF_STATE_SCHED bit, so it can be scheduled. napi_disable() does the opposite. Now during netvsc_device_remove(), when napi_disable is called for those subchannels, napi_disable gets stuck on infinite msleep. This fix addresses this problem by ensuring that napi_disable() is not getting called for non-enabled NAPI struct. But netif_napi_del() is still necessary for these non-enabled NAPI struct for cleanup purpose. Call trace: [ 654.559417] task:modprobe state:D stack: 0 pid: 2321 ppid: 1091 flags:0x00004002 [ 654.568030] Call Trace: [ 654.571221] [ 654.573790] __schedule+0x2d6/0x960 [ 654.577733] schedule+0x69/0xf0 [ 654.581214] schedule_timeout+0x87/0x140 [ 654.585463] ? __bpf_trace_tick_stop+0x20/0x20 [ 654.590291] msleep+0x2d/0x40 [ 654.593625] napi_disable+0x2b/0x80 [ 654.597437] netvsc_device_remove+0x8a/0x1f0 [hv_netvsc] [ 654.603935] rndis_filter_device_remove+0x194/0x1c0 [hv_netvsc] [ 654.611101] ? do_wait_intr+0xb0/0xb0 [ 654.615753] netvsc_remove+0x7c/0x120 [hv_netvsc] [ 654.621675] vmbus_remove+0x27/0x40 [hv_vmbus] Cc: stable@vger.kernel.org Fixes: ac5047671758 ("hv_netvsc: Disable NAPI before closing the VMBus channel") Signed-off-by: Souradeep Chakrabarti Reviewed-by: Dexuan Cui Reviewed-by: Haiyang Zhang Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/1706686551-28510-1-git-send-email-schakrabarti@linux.microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/netvsc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 1dafa44155d0e..a6fcbda64ecc6 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -708,7 +708,10 @@ void netvsc_device_remove(struct hv_device *device) /* Disable NAPI and disassociate its context from the device. */ for (i = 0; i < net_device->num_chn; i++) { /* See also vmbus_reset_channel_cb(). */ - napi_disable(&net_device->chan_table[i].napi); + /* only disable enabled NAPI channel */ + if (i < ndev->real_num_rx_queues) + napi_disable(&net_device->chan_table[i].napi); + netif_napi_del(&net_device->chan_table[i].napi); } -- GitLab From 7b6fb3050d8f5e2b6858eef344e47ac1f5442827 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 31 Jan 2024 09:08:44 -0500 Subject: [PATCH 0541/1405] selftests: team: Add missing config options Similar to commit dd2d40acdbb2 ("selftests: bonding: Add more missing config options"), add more networking-specific config options which are needed for team device tests. For testing, I used the minimal config generated by virtme-ng and I added the options in the config file. Afterwards, the team device test passed. Fixes: bbb774d921e2 ("net: Add tests for bonding and team address list management") Reviewed-by: Petr Machata Signed-off-by: Benjamin Poirier Link: https://lore.kernel.org/r/20240131140848.360618-2-bpoirier@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/team/config | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config index 265b6882cc21e..b5e3a3aad4bfb 100644 --- a/tools/testing/selftests/drivers/net/team/config +++ b/tools/testing/selftests/drivers/net/team/config @@ -1,3 +1,5 @@ +CONFIG_DUMMY=y +CONFIG_IPV6=y +CONFIG_MACVLAN=y CONFIG_NET_TEAM=y CONFIG_NET_TEAM_MODE_LOADBALANCE=y -CONFIG_MACVLAN=y -- GitLab From 8cc063ae1b3dbe416ce62a15d49af4c2314b45fe Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 31 Jan 2024 09:08:45 -0500 Subject: [PATCH 0542/1405] selftests: bonding: Check initial state The purpose of the test_LAG_cleanup() function is to check that some hardware addresses are removed from underlying devices after they have been unenslaved. The test function simply checks that those addresses are not present at the end. However, if the addresses were never added to begin with due to some error in device setup, the test function currently passes. This is a false positive since in that situation the test did not actually exercise the intended functionality. Add a check that the expected addresses are indeed present after device setup. This makes the test function more robust. I noticed this problem when running the team/dev_addr_lists.sh test on a system without support for dummy and ipv6: tools/testing/selftests/drivers/net/team# ./dev_addr_lists.sh Error: Unknown device type. Error: Unknown device type. This program is not intended to be run as root. RTNETLINK answers: Operation not supported TEST: team cleanup mode lacp [ OK ] Fixes: bbb774d921e2 ("net: Add tests for bonding and team address list management") Signed-off-by: Benjamin Poirier Link: https://lore.kernel.org/r/20240131140848.360618-3-bpoirier@nvidia.com Signed-off-by: Jakub Kicinski --- .../testing/selftests/drivers/net/bonding/lag_lib.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh index 2a268b17b61f5..dbdd736a41d39 100644 --- a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh +++ b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh @@ -48,6 +48,17 @@ test_LAG_cleanup() ip link add mv0 link "$name" up address "$ucaddr" type macvlan # Used to test dev->mc handling ip address add "$addr6" dev "$name" + + # Check that addresses were added as expected + (grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy1 || + grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy2) >/dev/null + check_err $? "macvlan unicast address not found on a slave" + + # mcaddr is added asynchronously by addrconf_dad_work(), use busywait + (busywait 10000 grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy1 || + grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy2) >/dev/null + check_err $? "IPv6 solicited-node multicast mac address not found on a slave" + ip link set dev "$name" down ip link del "$name" -- GitLab From 9d851dd4dab63e95c1911a2fa847796d1ec5d58d Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 31 Jan 2024 09:08:46 -0500 Subject: [PATCH 0543/1405] selftests: net: Remove executable bits from library scripts setup_loopback.sh and net_helper.sh are meant to be sourced from other scripts, not executed directly. Therefore, remove the executable bits from those files' permissions. This change is similar to commit 49078c1b80b6 ("selftests: forwarding: Remove executable bits from lib.sh") Fixes: 7d1575014a63 ("selftests/net: GRO coalesce test") Fixes: 3bdd9fd29cb0 ("selftests/net: synchronize udpgro tests' tx and rx connection") Suggested-by: Paolo Abeni Signed-off-by: Benjamin Poirier Link: https://lore.kernel.org/r/20240131140848.360618-4-bpoirier@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/net_helper.sh | 0 tools/testing/selftests/net/setup_loopback.sh | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 tools/testing/selftests/net/net_helper.sh mode change 100755 => 100644 tools/testing/selftests/net/setup_loopback.sh diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh old mode 100755 new mode 100644 diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh old mode 100755 new mode 100644 -- GitLab From 06efafd8608dac0c3a480539acc66ee41d2fb430 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 31 Jan 2024 09:08:47 -0500 Subject: [PATCH 0544/1405] selftests: net: List helper scripts in TEST_FILES Makefile variable Some scripts are not tests themselves; they contain utility functions used by other tests. According to Documentation/dev-tools/kselftest.rst, such files should be listed in TEST_FILES. Move those utility scripts to TEST_FILES. Fixes: 1751eb42ddb5 ("selftests: net: use TEST_PROGS_EXTENDED") Fixes: 25ae948b4478 ("selftests/net: add lib.sh") Fixes: b99ac1841147 ("kselftests/net: add missed setup_loopback.sh/setup_veth.sh to Makefile") Fixes: f5173fe3e13b ("selftests: net: included needed helper in the install targets") Suggested-by: Petr Machata Signed-off-by: Benjamin Poirier Link: https://lore.kernel.org/r/20240131140848.360618-5-bpoirier@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 48c6f93b81498..211753756bdee 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -53,9 +53,7 @@ TEST_PROGS += bind_bhash.sh TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh -TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh -TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh lib.sh -TEST_PROGS_EXTENDED += net_helper.sh +TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite @@ -97,6 +95,7 @@ TEST_PROGS += fq_band_pktlimit.sh TEST_PROGS += vlan_hw_filter.sh TEST_FILES := settings +TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh include ../lib.mk -- GitLab From 96cd5ac4c0e6b91b74c8fbfcaa7e5c943dfa4835 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Wed, 31 Jan 2024 09:08:48 -0500 Subject: [PATCH 0545/1405] selftests: forwarding: List helper scripts in TEST_FILES Makefile variable Some scripts are not tests themselves; they contain utility functions used by other tests. According to Documentation/dev-tools/kselftest.rst, such files should be listed in TEST_FILES. Currently they are incorrectly listed in TEST_PROGS_EXTENDED so rename the variable. Fixes: c085dbfb1cfc ("selftests/net/forwarding: define libs as TEST_PROGS_EXTENDED") Suggested-by: Petr Machata Signed-off-by: Benjamin Poirier Link: https://lore.kernel.org/r/20240131140848.360618-6-bpoirier@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 452693514be4b..4de92632f4836 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -112,7 +112,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ vxlan_symmetric_ipv6.sh \ vxlan_symmetric.sh -TEST_PROGS_EXTENDED := devlink_lib.sh \ +TEST_FILES := devlink_lib.sh \ ethtool_lib.sh \ fib_offload_lib.sh \ forwarding.config.sample \ -- GitLab From 1939f738c73dfdb8389839bdc9624c765e3326e6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 31 Jan 2024 08:56:05 -0800 Subject: [PATCH 0546/1405] selftests: net: add missing config for NF_TARGET_TTL amt test uses the TTL iptables module: ip netns exec "${RELAY}" iptables -t mangle -I PREROUTING \ -d 239.0.0.1 -j TTL --ttl-set 2 Fixes: c08e8baea78e ("selftests: add amt interface selftest script") Link: https://lore.kernel.org/r/20240131165605.4051645-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 98c6bd2228c69..24a7c7bcbbc19 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -33,6 +33,7 @@ CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_RAW=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_TARGET_TTL=m CONFIG_IPV6_GRE=m CONFIG_IPV6_SEG6_LWTUNNEL=y CONFIG_L2TP_ETH=m -- GitLab From c15a729c9d45aa142fb01a3afee822ab1f0e62a8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 31 Jan 2024 18:52:29 +0100 Subject: [PATCH 0547/1405] selftests: net: enable some more knobs The rtnetlink tests require additional options currently off by default. Fixes: 2766a11161cc ("selftests: rtnetlink: add ipsec offload API test") Fixes: 5e596ee171ba ("selftests: add xfrm state-policy-monitor to rtnetlink.sh") Signed-off-by: Paolo Abeni Link: https://lore.kernel.org/r/9048ca58e49b962f35dba1dfb2beaf3dab3e0411.1706723341.git.pabeni@redhat.com/ Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 24a7c7bcbbc19..3b749addd3640 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -22,6 +22,8 @@ CONFIG_VLAN_8021Q=y CONFIG_GENEVE=m CONFIG_IFB=y CONFIG_INET_DIAG=y +CONFIG_INET_ESP=y +CONFIG_INET_ESP_OFFLOAD=y CONFIG_IP_GRE=m CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y @@ -93,3 +95,4 @@ CONFIG_IP_SCTP=m CONFIG_NETFILTER_XT_MATCH_POLICY=m CONFIG_CRYPTO_ARIA=y CONFIG_XFRM_INTERFACE=m +CONFIG_XFRM_USER=m -- GitLab From 1389358bb008e7625942846e9f03554319b7fecc Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Thu, 1 Feb 2024 16:13:39 +0100 Subject: [PATCH 0548/1405] tracing/timerlat: Move hrtimer_init to timerlat_fd open() Currently, the timerlat's hrtimer is initialized at the first read of timerlat_fd, and destroyed at close(). It works, but it causes an error if the user program open() and close() the file without reading. Here's an example: # echo NO_OSNOISE_WORKLOAD > /sys/kernel/debug/tracing/osnoise/options # echo timerlat > /sys/kernel/debug/tracing/current_tracer # cat < ./timerlat_load.py # !/usr/bin/env python3 timerlat_fd = open("/sys/kernel/tracing/osnoise/per_cpu/cpu0/timerlat_fd", 'r') timerlat_fd.close(); EOF # ./taskset -c 0 ./timerlat_load.py BUG: kernel NULL pointer dereference, address: 0000000000000010 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 1 PID: 2673 Comm: python3 Not tainted 6.6.13-200.fc39.x86_64 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-1.fc39 04/01/2014 RIP: 0010:hrtimer_active+0xd/0x50 Code: 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 48 8b 57 30 <8b> 42 10 a8 01 74 09 f3 90 8b 42 10 a8 01 75 f7 80 7f 38 00 75 1d RSP: 0018:ffffb031009b7e10 EFLAGS: 00010286 RAX: 000000000002db00 RBX: ffff9118f786db08 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff9117a0e64400 RDI: ffff9118f786db08 RBP: ffff9118f786db80 R08: ffff9117a0ddd420 R09: ffff9117804d4f70 R10: 0000000000000000 R11: 0000000000000000 R12: ffff9118f786db08 R13: ffff91178fdd5e20 R14: ffff9117840978c0 R15: 0000000000000000 FS: 00007f2ffbab1740(0000) GS:ffff9118f7840000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 00000001b402e000 CR4: 0000000000750ee0 PKRU: 55555554 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x171/0x4e0 ? srso_alias_return_thunk+0x5/0x7f ? avc_has_extended_perms+0x237/0x520 ? exc_page_fault+0x7f/0x180 ? asm_exc_page_fault+0x26/0x30 ? hrtimer_active+0xd/0x50 hrtimer_cancel+0x15/0x40 timerlat_fd_release+0x48/0xe0 __fput+0xf5/0x290 __x64_sys_close+0x3d/0x80 do_syscall_64+0x60/0x90 ? srso_alias_return_thunk+0x5/0x7f ? __x64_sys_ioctl+0x72/0xd0 ? srso_alias_return_thunk+0x5/0x7f ? syscall_exit_to_user_mode+0x2b/0x40 ? srso_alias_return_thunk+0x5/0x7f ? do_syscall_64+0x6c/0x90 ? srso_alias_return_thunk+0x5/0x7f ? exit_to_user_mode_prepare+0x142/0x1f0 ? srso_alias_return_thunk+0x5/0x7f ? syscall_exit_to_user_mode+0x2b/0x40 ? srso_alias_return_thunk+0x5/0x7f ? do_syscall_64+0x6c/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 RIP: 0033:0x7f2ffb321594 Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 80 3d d5 cd 0d 00 00 74 13 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 3c c3 0f 1f 00 55 48 89 e5 48 83 ec 10 89 7d RSP: 002b:00007ffe8d8eef18 EFLAGS: 00000202 ORIG_RAX: 0000000000000003 RAX: ffffffffffffffda RBX: 00007f2ffba4e668 RCX: 00007f2ffb321594 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 00007ffe8d8eef40 R08: 0000000000000000 R09: 0000000000000000 R10: 55c926e3167eae79 R11: 0000000000000202 R12: 0000000000000003 R13: 00007ffe8d8ef030 R14: 0000000000000000 R15: 00007f2ffba4e668 CR2: 0000000000000010 ---[ end trace 0000000000000000 ]--- Move hrtimer_init to timerlat_fd open() to avoid this problem. Link: https://lore.kernel.org/linux-trace-kernel/7324dd3fc0035658c99b825204a66049389c56e3.1706798888.git.bristot@kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: stable@vger.kernel.org Fixes: e88ed227f639 ("tracing/timerlat: Add user-space interface") Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_osnoise.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index bd0d01d00fb9d..a8e28f9b9271c 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -2444,6 +2444,9 @@ static int timerlat_fd_open(struct inode *inode, struct file *file) tlat = this_cpu_tmr_var(); tlat->count = 0; + hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); + tlat->timer.function = timerlat_irq; + migrate_enable(); return 0; }; @@ -2526,9 +2529,6 @@ timerlat_fd_read(struct file *file, char __user *ubuf, size_t count, tlat->tracing_thread = false; tlat->kthread = current; - hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); - tlat->timer.function = timerlat_irq; - /* Annotate now to drift new period */ tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); -- GitLab From 5a49f996046ba947466bc7461e4b19c4d1daf978 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 1 Feb 2024 10:34:47 -0500 Subject: [PATCH 0549/1405] eventfs: Warn if an eventfs_inode is freed without is_freed being set There should never be a case where an evenfs_inode is being freed without is_freed being set. Add a WARN_ON_ONCE() if it ever happens. That would mean there was one too many put_ei()s. Link: https://lore.kernel.org/linux-trace-kernel/20240201161616.843551963@goodmis.org Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 515fdace1eeaf..ca7daee7c8115 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -73,6 +73,9 @@ enum { static void release_ei(struct kref *ref) { struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref); + + WARN_ON_ONCE(!ei->is_freed); + kfree(ei->entry_attrs); kfree_const(ei->name); kfree_rcu(ei, rcu); @@ -84,6 +87,14 @@ static inline void put_ei(struct eventfs_inode *ei) kref_put(&ei->kref, release_ei); } +static inline void free_ei(struct eventfs_inode *ei) +{ + if (ei) { + ei->is_freed = 1; + put_ei(ei); + } +} + static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei) { if (ei) @@ -679,7 +690,7 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode /* Was the parent freed? */ if (list_empty(&ei->list)) { - put_ei(ei); + free_ei(ei); ei = NULL; } return ei; @@ -770,7 +781,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry return ei; fail: - put_ei(ei); + free_ei(ei); tracefs_failed_creating(dentry); return ERR_PTR(-ENOMEM); } @@ -801,9 +812,8 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level) list_for_each_entry(ei_child, &ei->children, list) eventfs_remove_rec(ei_child, level + 1); - ei->is_freed = 1; list_del(&ei->list); - put_ei(ei); + free_ei(ei); } /** -- GitLab From 264424dfdd5cbd92bc5b5ddf93944929fc877fac Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 1 Feb 2024 10:34:48 -0500 Subject: [PATCH 0550/1405] eventfs: Restructure eventfs_inode structure to be more condensed Some of the eventfs_inode structure has holes in it. Rework the structure to be a bit more condensed, and also remove the no longer used llist field. Link: https://lore.kernel.org/linux-trace-kernel/20240201161617.002321438@goodmis.org Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/internal.h | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 1886f1826cd83..beb3dcd0e4342 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -32,40 +32,37 @@ struct eventfs_attr { /* * struct eventfs_inode - hold the properties of the eventfs directories. * @list: link list into the parent directory + * @rcu: Union with @list for freeing + * @children: link list into the child eventfs_inode * @entries: the array of entries representing the files in the directory * @name: the name of the directory to create - * @children: link list into the child eventfs_inode * @events_dir: the dentry of the events directory * @entry_attrs: Saved mode and ownership of the @d_children - * @attr: Saved mode and ownership of eventfs_inode itself * @data: The private data to pass to the callbacks + * @attr: Saved mode and ownership of eventfs_inode itself * @is_freed: Flag set if the eventfs is on its way to be freed * Note if is_freed is set, then dentry is corrupted. + * @is_events: Flag set for only the top level "events" directory * @nr_entries: The number of items in @entries + * @ino: The saved inode number */ struct eventfs_inode { - struct kref kref; - struct list_head list; + union { + struct list_head list; + struct rcu_head rcu; + }; + struct list_head children; const struct eventfs_entry *entries; const char *name; - struct list_head children; struct dentry *events_dir; struct eventfs_attr *entry_attrs; - struct eventfs_attr attr; void *data; + struct eventfs_attr attr; + struct kref kref; unsigned int is_freed:1; unsigned int is_events:1; unsigned int nr_entries:30; unsigned int ino; - /* - * Union - used for deletion - * @llist: for calling dput() if needed after RCU - * @rcu: eventfs_inode to delete in RCU - */ - union { - struct llist_node llist; - struct rcu_head rcu; - }; }; static inline struct tracefs_inode *get_tracefs(const struct inode *inode) -- GitLab From 12d823b31fadf47c8f36ecada7abac5f903cac33 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 1 Feb 2024 10:34:49 -0500 Subject: [PATCH 0551/1405] eventfs: Remove fsnotify*() functions from lookup() The dentries and inodes are created when referenced in the lookup code. There's no reason to call fsnotify_*() functions when they are created by a reference. It doesn't make any sense. Link: https://lore.kernel.org/linux-trace-kernel/20240201002719.GS2087318@ZenIV/ Link: https://lore.kernel.org/linux-trace-kernel/20240201161617.166973329@goodmis.org Cc: stable@vger.kernel.org Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Fixes: a376007917776 ("eventfs: Implement functions to create files and dirs when accessed"); Suggested-by: Al Viro Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index ca7daee7c8115..9e031e5a2713b 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -366,7 +366,6 @@ static struct dentry *lookup_file(struct eventfs_inode *parent_ei, dentry->d_fsdata = get_ei(parent_ei); d_add(dentry, inode); - fsnotify_create(dentry->d_parent->d_inode, dentry); return NULL; }; @@ -408,7 +407,6 @@ static struct dentry *lookup_dir_entry(struct dentry *dentry, inc_nlink(inode); d_add(dentry, inode); inc_nlink(dentry->d_parent->d_inode); - fsnotify_mkdir(dentry->d_parent->d_inode, dentry); return NULL; } -- GitLab From ca185770db914869ff9fe773bac5e0e5e4165b83 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 1 Feb 2024 10:34:50 -0500 Subject: [PATCH 0552/1405] eventfs: Keep all directory links at 1 The directory link count in eventfs was somewhat bogus. It was only being updated when a directory child was being looked up and not on creation. One solution would be to update in get_attr() the link count by iterating the ei->children list and then adding 2. But that could slow down simple stat() calls, especially if it's done on all directories in eventfs. Another solution would be to add a parent pointer to the eventfs_inode and keep track of the number of sub directories it has on creation. But this adds overhead for something not really worthwhile. The solution decided upon is to keep all directory links in eventfs as 1. This tells user space not to rely on the hard links of directories. Which in this case it shouldn't. Link: https://lore.kernel.org/linux-trace-kernel/20240201002719.GS2087318@ZenIV/ Link: https://lore.kernel.org/linux-trace-kernel/20240201161617.339968298@goodmis.org Cc: stable@vger.kernel.org Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Christian Brauner Cc: Al Viro Cc: Ajay Kaher Fixes: c1504e510238 ("eventfs: Implement eventfs dir creation functions") Suggested-by: Al Viro Signed-off-by: Steven Rostedt (Google) --- fs/tracefs/event_inode.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 9e031e5a2713b..110e8a2721890 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -404,9 +404,7 @@ static struct dentry *lookup_dir_entry(struct dentry *dentry, dentry->d_fsdata = get_ei(ei); - inc_nlink(inode); d_add(dentry, inode); - inc_nlink(dentry->d_parent->d_inode); return NULL; } @@ -769,9 +767,17 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry dentry->d_fsdata = get_ei(ei); - /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inc_nlink(inode); + /* + * Keep all eventfs directories with i_nlink == 1. + * Due to the dynamic nature of the dentry creations and not + * wanting to add a pointer to the parent eventfs_inode in the + * eventfs_inode structure, keeping the i_nlink in sync with the + * number of directories would cause too much complexity for + * something not worth much. Keeping directory links at 1 + * tells userspace not to trust the link number. + */ d_instantiate(dentry, inode); + /* The dentry of the "events" parent does keep track though */ inc_nlink(dentry->d_parent->d_inode); fsnotify_mkdir(dentry->d_parent->d_inode, dentry); tracefs_end_creating(dentry); -- GitLab From e440c5f2e3e6893aeb39bbba6dd181207840a795 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 9 Jan 2024 15:11:17 +0100 Subject: [PATCH 0553/1405] KVM: selftests: Generalize check_clocksource() from kvm_clock_test Several existing x86 selftests need to check that the underlying system clocksource is TSC or based on TSC but every test implements its own check. As a first step towards unification, extract check_clocksource() from kvm_clock_test and split it into two functions: arch-neutral 'sys_get_cur_clocksource()' and x86-specific 'sys_clocksource_is_tsc()'. Fix a couple of pre-existing issues in kvm_clock_test: memory leakage in check_clocksource() and using TEST_ASSERT() instead of TEST_REQUIRE(). The change also makes the test fail when system clocksource can't be read from sysfs. Signed-off-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20240109141121.1619463-2-vkuznets@redhat.com [sean: eliminate if-elif pattern just to set a bool true] Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/include/test_util.h | 2 + .../selftests/kvm/include/x86_64/processor.h | 2 + tools/testing/selftests/kvm/lib/test_util.c | 25 ++++++++++++ .../selftests/kvm/lib/x86_64/processor.c | 10 +++++ .../selftests/kvm/x86_64/kvm_clock_test.c | 38 +------------------ 5 files changed, 40 insertions(+), 37 deletions(-) diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 71a41fa924b7d..50a5e31ba8da1 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -195,4 +195,6 @@ __printf(3, 4) int guest_snprintf(char *buf, int n, const char *fmt, ...); char *strdup_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2), nonnull(1))); +char *sys_get_cur_clocksource(void); + #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index a84863503fcb4..01eec72e0d3e3 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -1271,4 +1271,6 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, #define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT) #define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT) +bool sys_clocksource_is_tsc(void); + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 5d7f28b02d73b..5a8f8becb1298 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -392,3 +392,28 @@ char *strdup_printf(const char *fmt, ...) return str; } + +#define CLOCKSOURCE_PATH "/sys/devices/system/clocksource/clocksource0/current_clocksource" + +char *sys_get_cur_clocksource(void) +{ + char *clk_name; + struct stat st; + FILE *fp; + + fp = fopen(CLOCKSOURCE_PATH, "r"); + TEST_ASSERT(fp, "failed to open clocksource file, errno: %d", errno); + + TEST_ASSERT(!fstat(fileno(fp), &st), "failed to stat clocksource file, errno: %d", + errno); + + clk_name = malloc(st.st_size); + TEST_ASSERT(clk_name, "failed to allocate buffer to read file"); + + TEST_ASSERT(fgets(clk_name, st.st_size, fp), "failed to read clocksource file: %d", + ferror(fp)); + + fclose(fp); + + return clk_name; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 4bc52948447d8..e6964ff2a37da 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1299,3 +1299,13 @@ void kvm_selftest_arch_init(void) host_cpu_is_intel = this_cpu_is_intel(); host_cpu_is_amd = this_cpu_is_amd(); } + +bool sys_clocksource_is_tsc(void) +{ + char *clk_name = sys_get_cur_clocksource(); + bool ret = !strcmp(clk_name, "tsc\n"); + + free(clk_name); + + return ret; +} diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c index 3e0b7d51abdaa..6fcc1a4335875 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c @@ -132,42 +132,6 @@ static void enter_guest(struct kvm_vcpu *vcpu) } } -#define CLOCKSOURCE_PATH "/sys/devices/system/clocksource/clocksource0/current_clocksource" - -static void check_clocksource(void) -{ - char *clk_name; - struct stat st; - FILE *fp; - - fp = fopen(CLOCKSOURCE_PATH, "r"); - if (!fp) { - pr_info("failed to open clocksource file: %d; assuming TSC.\n", - errno); - return; - } - - if (fstat(fileno(fp), &st)) { - pr_info("failed to stat clocksource file: %d; assuming TSC.\n", - errno); - goto out; - } - - clk_name = malloc(st.st_size); - TEST_ASSERT(clk_name, "failed to allocate buffer to read file"); - - if (!fgets(clk_name, st.st_size, fp)) { - pr_info("failed to read clocksource file: %d; assuming TSC.\n", - ferror(fp)); - goto out; - } - - TEST_ASSERT(!strncmp(clk_name, "tsc\n", st.st_size), - "clocksource not supported: %s", clk_name); -out: - fclose(fp); -} - int main(void) { struct kvm_vcpu *vcpu; @@ -179,7 +143,7 @@ int main(void) flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK); TEST_REQUIRE(flags & KVM_CLOCK_REALTIME); - check_clocksource(); + TEST_REQUIRE(sys_clocksource_is_tsc()); vm = vm_create_with_one_vcpu(&vcpu, guest_main); -- GitLab From 410cb01ead5bcec500c0654f361d620553f930aa Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 9 Jan 2024 15:11:18 +0100 Subject: [PATCH 0554/1405] KVM: selftests: Use generic sys_clocksource_is_tsc() in vmx_nested_tsc_scaling_test Despite its name, system_has_stable_tsc() just checks that system clocksource is 'tsc'; this can now be done with generic sys_clocksource_is_tsc(). No functional change intended. Signed-off-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20240109141121.1619463-3-vkuznets@redhat.com Signed-off-by: Sean Christopherson --- .../kvm/x86_64/vmx_nested_tsc_scaling_test.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c index e710b6e7fb384..93b0a850a2400 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c @@ -116,23 +116,6 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_DONE(); } -static bool system_has_stable_tsc(void) -{ - bool tsc_is_stable; - FILE *fp; - char buf[4]; - - fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r"); - if (fp == NULL) - return false; - - tsc_is_stable = fgets(buf, sizeof(buf), fp) && - !strncmp(buf, "tsc", sizeof(buf)); - - fclose(fp); - return tsc_is_stable; -} - int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; @@ -148,7 +131,7 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL)); - TEST_REQUIRE(system_has_stable_tsc()); + TEST_REQUIRE(sys_clocksource_is_tsc()); /* * We set L1's scale factor to be a random number from 2 to 10. -- GitLab From 09951bf2cbb3a7893f76d1364b0ae6e3007ff1de Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 9 Jan 2024 15:11:19 +0100 Subject: [PATCH 0555/1405] KVM: selftests: Run clocksource dependent tests with hyperv_clocksource_tsc_page too KVM's 'gtod_is_based_on_tsc()' recognizes two clocksources: 'tsc' and 'hyperv_clocksource_tsc_page' and enables kvmclock in 'masterclock' mode when either is in use. Transform 'sys_clocksource_is_tsc()' into 'sys_clocksource_is_based_on_tsc()' to support the later. This affects two tests: kvm_clock_test and vmx_nested_tsc_scaling_test, both seem to work well when system clocksource is 'hyperv_clocksource_tsc_page'. Signed-off-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20240109141121.1619463-4-vkuznets@redhat.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/x86_64/processor.h | 2 +- tools/testing/selftests/kvm/lib/x86_64/processor.c | 5 +++-- tools/testing/selftests/kvm/x86_64/kvm_clock_test.c | 2 +- .../selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 01eec72e0d3e3..5bca8c947c825 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -1271,6 +1271,6 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, #define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT) #define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT) -bool sys_clocksource_is_tsc(void); +bool sys_clocksource_is_based_on_tsc(void); #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index e6964ff2a37da..f639b3e062e3a 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1300,10 +1300,11 @@ void kvm_selftest_arch_init(void) host_cpu_is_amd = this_cpu_is_amd(); } -bool sys_clocksource_is_tsc(void) +bool sys_clocksource_is_based_on_tsc(void) { char *clk_name = sys_get_cur_clocksource(); - bool ret = !strcmp(clk_name, "tsc\n"); + bool ret = !strcmp(clk_name, "tsc\n") || + !strcmp(clk_name, "hyperv_clocksource_tsc_page\n"); free(clk_name); diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c index 6fcc1a4335875..5bc12222d87af 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c @@ -143,7 +143,7 @@ int main(void) flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK); TEST_REQUIRE(flags & KVM_CLOCK_REALTIME); - TEST_REQUIRE(sys_clocksource_is_tsc()); + TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); vm = vm_create_with_one_vcpu(&vcpu, guest_main); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c index 93b0a850a2400..1759fa5cb3f29 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c @@ -131,7 +131,7 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL)); - TEST_REQUIRE(sys_clocksource_is_tsc()); + TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); /* * We set L1's scale factor to be a random number from 2 to 10. -- GitLab From b6831a108be1206cd9f0e7905b48677b4147d5f9 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 9 Jan 2024 15:11:20 +0100 Subject: [PATCH 0556/1405] KVM: selftests: Make hyperv_clock require TSC based system clocksource KVM sets up Hyper-V TSC page clocksource for its guests when system clocksource is 'based on TSC' (see gtod_is_based_on_tsc()), running hyperv_clock with any other clocksource leads to imminent failure. Add the missing requirement to make the test skip gracefully. Signed-off-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20240109141121.1619463-5-vkuznets@redhat.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86_64/hyperv_clock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c index 65690d916db7e..e058bc676cd69 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c @@ -212,6 +212,7 @@ int main(void) int stage; TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME)); + TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); vm = vm_create_with_one_vcpu(&vcpu, guest_main); -- GitLab From 9e62797fd7e8eef9c8a3f7b54b57fbc9caf6a20a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 9 Jan 2024 15:11:21 +0100 Subject: [PATCH 0557/1405] KVM: x86: Make gtod_is_based_on_tsc() return 'bool' gtod_is_based_on_tsc() is boolean in nature, i.e. it returns '1' for good clocksources and '0' otherwise. Moreover, its result is used raw by kvm_get_time_and_clockread()/kvm_get_walltime_and_clockread() which are 'bool'. No functional change intended. Signed-off-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20240109141121.1619463-6-vkuznets@redhat.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 363b1c0802057..aa27aec10860a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2507,7 +2507,7 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) } #ifdef CONFIG_X86_64 -static inline int gtod_is_based_on_tsc(int mode) +static inline bool gtod_is_based_on_tsc(int mode) { return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK; } -- GitLab From b6c620dc43ccb4e802894e54b651cf81495e9598 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 31 Jan 2024 22:49:46 +0100 Subject: [PATCH 0558/1405] mptcp: fix data re-injection from stale subflow When the MPTCP PM detects that a subflow is stale, all the packet scheduler must re-inject all the mptcp-level unacked data. To avoid acquiring unneeded locks, it first try to check if any unacked data is present at all in the RTX queue, but such check is currently broken, as it uses TCP-specific helper on an MPTCP socket. Funnily enough fuzzers and static checkers are happy, as the accessed memory still belongs to the mptcp_sock struct, and even from a functional perspective the recovery completed successfully, as the short-cut test always failed. A recent unrelated TCP change - commit d5fed5addb2b ("tcp: reorganize tcp_sock fast path variables") - exposed the issue, as the tcp field reorganization makes the mptcp code always skip the re-inection. Fix the issue dropping the bogus call: we are on a slow path, the early optimization proved once again to be evil. Fixes: 1e1d9d6f119c ("mptcp: handle pending data on closed subflow") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/468 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-1-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3ed4709a75096..028e8b473626f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2314,9 +2314,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk) if (__mptcp_check_fallback(msk)) return false; - if (tcp_rtx_and_write_queues_empty(sk)) - return false; - /* the closing socket has some data untransmitted and/or unacked: * some data in the mptcp rtx queue has not really xmitted yet. * keep it simple and re-inject the whole mptcp level rtx queue -- GitLab From 3645c844902bd4e173d6704fc2a37e8746904d67 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:47 +0100 Subject: [PATCH 0559/1405] selftests: mptcp: add missing kconfig for NF Filter Since the commit mentioned below, 'mptcp_join' selftests is using IPTables to add rules to the Filter table. It is then required to have IP_NF_FILTER KConfig. This KConfig is usually enabled by default in many defconfig, but we recently noticed that some CI were running our selftests without them enabled. Fixes: 8d014eaa9254 ("selftests: mptcp: add ADD_ADDR timeout test case") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index e317c2e44dae8..2a00bf4acdfa8 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -22,6 +22,7 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SOCKET=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_ACT_CSUM=m -- GitLab From 8c86fad2cecdc6bf7283ecd298b4d0555bd8b8aa Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:48 +0100 Subject: [PATCH 0560/1405] selftests: mptcp: add missing kconfig for NF Filter in v6 Since the commit mentioned below, 'mptcp_join' selftests is using IPTables to add rules to the Filter table for IPv6. It is then required to have IP6_NF_FILTER KConfig. This KConfig is usually enabled by default in many defconfig, but we recently noticed that some CI were running our selftests without them enabled. Fixes: 523514ed0a99 ("selftests: mptcp: add ADD_ADDR IPv6 test cases") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-3-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 2a00bf4acdfa8..26fe466f803dd 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -25,6 +25,7 @@ CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IP6_NF_FILTER=m CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_CLS_ACT=y -- GitLab From 2d41f10fa497182df9012d3e95d9cea24eb42e61 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:49 +0100 Subject: [PATCH 0561/1405] selftests: mptcp: add missing kconfig for NF Mangle Since the commit mentioned below, 'mptcp_join' selftests is using IPTables to add rules to the Mangle table, only in IPv4. This KConfig is usually enabled by default in many defconfig, but we recently noticed that some CI were running our selftests without them enabled. Fixes: b6e074e171bc ("selftests: mptcp: add infinite map testcase") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-4-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 26fe466f803dd..4f80014cae494 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -23,6 +23,7 @@ CONFIG_NFT_SOCKET=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IP6_NF_FILTER=m -- GitLab From 4d4dfb2019d7010efb65926d9d1c1793f9a367c6 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:50 +0100 Subject: [PATCH 0562/1405] selftests: mptcp: increase timeout to 30 min On very slow environments -- e.g. when QEmu is used without KVM --, mptcp_join.sh selftest can take a bit more than 20 minutes. Bump the default timeout by 50% as it seems normal to take that long on some environments. When a debug kernel config is used, this selftest will take even longer, but that's certainly not a common test env to consider for the timeout. The Fixes tag that has been picked here is there simply to help having this patch backported to older stable versions. It is difficult to point to the exact commit that made some env reaching the timeout from time to time. Fixes: d17b968b9876 ("selftests: mptcp: increase timeout to 20 minutes") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-5-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/settings | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings index 79b65bdf05db6..abc5648b59abd 100644 --- a/tools/testing/selftests/net/mptcp/settings +++ b/tools/testing/selftests/net/mptcp/settings @@ -1 +1 @@ -timeout=1200 +timeout=1800 -- GitLab From 5e2f3c65af47e527ccac54060cf909e3306652ff Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:51 +0100 Subject: [PATCH 0563/1405] selftests: mptcp: decrease BW in simult flows When running the simult_flow selftest in slow environments -- e.g. QEmu without KVM support --, the results can be unstable. This selftest checks if the aggregated bandwidth is (almost) fully used as expected. To help improving the stability while still keeping the same validation in place, the BW and the delay are reduced to lower the pressure on the CPU. Fixes: 1a418cb8e888 ("mptcp: simult flow self-tests") Fixes: 219d04992b68 ("mptcp: push pending frames when subflow has free space") Cc: stable@vger.kernel.org Suggested-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-6-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/simult_flows.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index ae8ad5d6fb9da..0cc964e6f2c17 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -284,12 +284,12 @@ done setup run_test 10 10 0 0 "balanced bwidth" -run_test 10 10 1 50 "balanced bwidth with unbalanced delay" +run_test 10 10 1 25 "balanced bwidth with unbalanced delay" # we still need some additional infrastructure to pass the following test-cases -run_test 30 10 0 0 "unbalanced bwidth" -run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" -run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" +run_test 10 3 0 0 "unbalanced bwidth" +run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay" +run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay" mptcp_lib_result_print_all_tap exit $ret -- GitLab From de46d138e7735eded9756906747fd3a8c3a42225 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:52 +0100 Subject: [PATCH 0564/1405] selftests: mptcp: allow changing subtests prefix If a CI executes the same selftest multiple times with different options, all results from the same subtests will have the same title, which confuse the CI. With the same title printed in TAP, the tests are considered as the same ones. Now, it is possible to override this prefix by using MPTCP_LIB_KSFT_TEST env var, and have a different title. While at it, use 'basename' to remove the suffix as well instead of using an extra 'sed'. Fixes: c4192967e62f ("selftests: mptcp: lib: format subtests results in TAP") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-7-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 022262a2cfe0e..3a2abae5993e2 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -6,7 +6,7 @@ readonly KSFT_FAIL=1 readonly KSFT_SKIP=4 # shellcheck disable=SC2155 # declare and assign separately -readonly KSFT_TEST=$(basename "${0}" | sed 's/\.sh$//g') +readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}" MPTCP_LIB_SUBTESTS=() -- GitLab From 31ee4ad86afd6ed6f4bb1b38c43011216080c42a Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:53 +0100 Subject: [PATCH 0565/1405] selftests: mptcp: join: stop transfer when check is done (part 1) Since the "Fixes" commit mentioned below, "userspace pm" subtests of mptcp_join selftests introduced in v6.5 are launching the whole transfer in the background, do the required checks, then wait for the end of transfer. There is no need to wait longer, especially because the checks at the end of the transfer are ignored (which is fine). This saves quite a few seconds in slow environments. Note that old versions will need commit bdbef0a6ff10 ("selftests: mptcp: add mptcp_lib_kill_wait") as well to get 'mptcp_lib_kill_wait()' helper. Fixes: 4369c198e599 ("selftests: mptcp: test userspace pm out of transfer") Cc: stable@vger.kernel.org # 6.5.x: bdbef0a6ff10: selftests: mptcp: add mptcp_lib_kill_wait Cc: stable@vger.kernel.org # 6.5.x Reviewed-and-tested-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-8-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 3a5b630261910..85bcc95f4ede6 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3453,7 +3453,7 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi # userspace pm create destroy subflow @@ -3475,7 +3475,7 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi # userspace pm create id 0 subflow -- GitLab From 04b57c9e096a9479fe0ad31e3956e336fa589cb2 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 31 Jan 2024 22:49:54 +0100 Subject: [PATCH 0566/1405] selftests: mptcp: join: stop transfer when check is done (part 2) Since the "Fixes" commits mentioned below, the newly added "userspace pm" subtests of mptcp_join selftests are launching the whole transfer in the background, do the required checks, then wait for the end of transfer. There is no need to wait longer, especially because the checks at the end of the transfer are ignored (which is fine). This saves quite a few seconds on slow environments. While at it, use 'mptcp_lib_kill_wait()' helper everywhere, instead of on a specific one with 'kill_tests_wait()'. Fixes: b2e2248f365a ("selftests: mptcp: userspace pm create id 0 subflow") Fixes: e3b47e460b4b ("selftests: mptcp: userspace pm remove initial subflow") Fixes: b9fb176081fb ("selftests: mptcp: userspace pm send RM_ADDR for ID 0") Cc: stable@vger.kernel.org Reviewed-and-tested-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240131-upstream-net-20240131-mptcp-ci-issues-v1-9-4c1c11e571ff@kernel.org Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 85bcc95f4ede6..c07386e21e0a4 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -643,13 +643,6 @@ kill_events_pids() mptcp_lib_kill_wait $evts_ns2_pid } -kill_tests_wait() -{ - #shellcheck disable=SC2046 - kill -SIGUSR1 $(ip netns pids $ns2) $(ip netns pids $ns1) - wait -} - pm_nl_set_limits() { local ns=$1 @@ -3494,7 +3487,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi # userspace pm remove initial subflow @@ -3518,7 +3511,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi # userspace pm send RM_ADDR for ID 0 @@ -3544,7 +3537,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi } @@ -3558,7 +3551,8 @@ endpoint_tests() pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal speed=slow \ - run_tests $ns1 $ns2 10.0.1.1 2>/dev/null & + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! wait_mpj $ns1 pm_nl_check_endpoint "creation" \ @@ -3573,7 +3567,7 @@ endpoint_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags signal pm_nl_check_endpoint "modif is allowed" \ $ns2 10.0.2.2 id 1 flags signal - kill_tests_wait + mptcp_lib_kill_wait $tests_pid fi if reset "delete and re-add" && @@ -3582,7 +3576,8 @@ endpoint_tests() pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow test_linkfail=4 speed=20 \ - run_tests $ns1 $ns2 10.0.1.1 2>/dev/null & + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! wait_mpj $ns2 chk_subflow_nr "before delete" 2 @@ -3597,7 +3592,7 @@ endpoint_tests() wait_mpj $ns2 chk_subflow_nr "after re-add" 2 chk_mptcp_info subflows 1 subflows 1 - kill_tests_wait + mptcp_lib_kill_wait $tests_pid fi } -- GitLab From f0588b157f48b9c6277a75c9f14650e86d969e03 Mon Sep 17 00:00:00 2001 From: Pavan Kumar Linga Date: Wed, 31 Jan 2024 14:22:40 -0800 Subject: [PATCH 0567/1405] idpf: avoid compiler padding in virtchnl2_ptype struct In the arm random config file, kconfig option 'CONFIG_AEABI' is disabled which results in adding the compiler flag '-mabi=apcs-gnu'. This causes the compiler to add padding in virtchnl2_ptype structure to align it to 8 bytes, resulting in the following size check failure: include/linux/build_bug.h:78:41: error: static assertion failed: "(6) == sizeof(struct virtchnl2_ptype)" 78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) | ^~~~~~~~~~~~~~ include/linux/build_bug.h:77:34: note: in expansion of macro '__static_assert' 77 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr) | ^~~~~~~~~~~~~~~ drivers/net/ethernet/intel/idpf/virtchnl2.h:26:9: note: in expansion of macro 'static_assert' 26 | static_assert((n) == sizeof(struct X)) | ^~~~~~~~~~~~~ drivers/net/ethernet/intel/idpf/virtchnl2.h:982:1: note: in expansion of macro 'VIRTCHNL2_CHECK_STRUCT_LEN' 982 | VIRTCHNL2_CHECK_STRUCT_LEN(6, virtchnl2_ptype); | ^~~~~~~~~~~~~~~~~~~~~~~~~~ Avoid the compiler padding by using "__packed" structure attribute for the virtchnl2_ptype struct. Also align the structure by using "__aligned(2)" for better code optimization. Fixes: 0d7502a9b4a7 ("virtchnl: add virtchnl version 2 ops") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312220250.ufEm8doQ-lkp@intel.com Reviewed-by: Przemek Kitszel Reviewed-by: Paul Menzel Reviewed-by: Simon Horman Signed-off-by: Pavan Kumar Linga Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240131222241.2087516-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/idpf/virtchnl2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/idpf/virtchnl2.h b/drivers/net/ethernet/intel/idpf/virtchnl2.h index 8dc837889723c..4a3c4454d25ab 100644 --- a/drivers/net/ethernet/intel/idpf/virtchnl2.h +++ b/drivers/net/ethernet/intel/idpf/virtchnl2.h @@ -978,7 +978,7 @@ struct virtchnl2_ptype { u8 proto_id_count; __le16 pad; __le16 proto_id[]; -}; +} __packed __aligned(2); VIRTCHNL2_CHECK_STRUCT_LEN(6, virtchnl2_ptype); /** -- GitLab From 069a6ed2992df8eaae90d69d7770de8d545327d9 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Thu, 1 Feb 2024 11:38:53 +0000 Subject: [PATCH 0568/1405] doc/netlink/specs: Add missing attr in rt_link spec IFLA_DPLL_PIN was added to rt_link messages but not to the spec, which breaks ynl. Add the missing definitions to the rt_link ynl spec. Fixes: 5f1842692880 ("netdev: expose DPLL pin handle for netdevice") Signed-off-by: Donald Hunter Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240201113853.37432-1-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_link.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml index 1ad01d52a8638..8e4d19adee8cd 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt_link.yaml @@ -942,6 +942,10 @@ attribute-sets: - name: gro-ipv4-max-size type: u32 + - + name: dpll-pin + type: nest + nested-attributes: link-dpll-pin-attrs - name: af-spec-attrs attributes: @@ -1627,6 +1631,12 @@ attribute-sets: - name: used type: u8 + - + name: link-dpll-pin-attrs + attributes: + - + name: id + type: u32 sub-messages: - -- GitLab From f0377ff97509f5a4921993d5d61da000361bd884 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Thu, 18 Jan 2024 12:48:54 +0100 Subject: [PATCH 0569/1405] nvme-host: fix the updating of the firmware version The original code didn't update the firmware version if the "next slot" of the AFI register isn't zero or if the "current slot" field is zero; in those cases it assumed that a reset was needed. However, the NVMe specification doesn't exclude the possibility that the "next slot" value is equal to the "current slot" value, meaning that the same firmware slot will be activated after performing a controller level reset; in this case a reset is clearly not necessary and we can safely update the firmware version. Modify the code so the kernel will report that a Controller Level Reset is needed only in the following cases: 1) If the "current slot" field is zero. This is invalid and means that something is wrong, a reset is needed. or 2) if the "next slot" field isn't zero AND it's not equal to the "current slot" value. This means that at the next reset a different firmware slot will be activated. Fixes: 983a338b96c8 ("nvme: update firmware version after commit") Signed-off-by: Maurizio Lombardi Reviewed-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0d124a8ca9c32..975245527c1fc 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4191,6 +4191,7 @@ static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl) static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl) { struct nvme_fw_slot_info_log *log; + u8 next_fw_slot, cur_fw_slot; log = kmalloc(sizeof(*log), GFP_KERNEL); if (!log) @@ -4202,13 +4203,15 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl) goto out_free_log; } - if (log->afi & 0x70 || !(log->afi & 0x7)) { + cur_fw_slot = log->afi & 0x7; + next_fw_slot = (log->afi & 0x70) >> 4; + if (!cur_fw_slot || (next_fw_slot && (cur_fw_slot != next_fw_slot))) { dev_info(ctrl->device, "Firmware is activated after next Controller Level Reset\n"); goto out_free_log; } - memcpy(ctrl->subsys->firmware_rev, &log->frs[(log->afi & 0x7) - 1], + memcpy(ctrl->subsys->firmware_rev, &log->frs[cur_fw_slot - 1], sizeof(ctrl->subsys->firmware_rev)); out_free_log: -- GitLab From f3c89983cb4fc00be64eb0d5cbcfcdf2cacb965e Mon Sep 17 00:00:00 2001 From: Hongyu Jin Date: Tue, 30 Jan 2024 15:26:34 -0500 Subject: [PATCH 0570/1405] block: Fix where bio IO priority gets set Commit 82b74cac2849 ("blk-ioprio: Convert from rqos policy to direct call") pushed setting bio I/O priority down into blk_mq_submit_bio() -- which is too low within block core's submit_bio() because it skips setting I/O priority for block drivers that implement fops->submit_bio() (e.g. DM, MD, etc). Fix this by moving bio_set_ioprio() up from blk-mq.c to blk-core.c and call it from submit_bio(). This ensures all block drivers call bio_set_ioprio() during initial bio submission. Fixes: a78418e6a04c ("block: Always initialize bio IO priority on submit") Co-developed-by: Yibin Ding Signed-off-by: Yibin Ding Signed-off-by: Hongyu Jin Reviewed-by: Eric Biggers Reviewed-by: Mikulas Patocka [snitzer: revised commit header] Signed-off-by: Mike Snitzer Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20240130202638.62600-2-snitzer@kernel.org Signed-off-by: Jens Axboe --- block/blk-core.c | 10 ++++++++++ block/blk-mq.c | 10 ---------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 11342af420d0c..de771093b5268 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -49,6 +49,7 @@ #include "blk-pm.h" #include "blk-cgroup.h" #include "blk-throttle.h" +#include "blk-ioprio.h" struct dentry *blk_debugfs_root; @@ -833,6 +834,14 @@ void submit_bio_noacct(struct bio *bio) } EXPORT_SYMBOL(submit_bio_noacct); +static void bio_set_ioprio(struct bio *bio) +{ + /* Nobody set ioprio so far? Initialize it based on task's nice value */ + if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) == IOPRIO_CLASS_NONE) + bio->bi_ioprio = get_current_ioprio(); + blkcg_set_ioprio(bio); +} + /** * submit_bio - submit a bio to the block device layer for I/O * @bio: The &struct bio which describes the I/O @@ -855,6 +864,7 @@ void submit_bio(struct bio *bio) count_vm_events(PGPGOUT, bio_sectors(bio)); } + bio_set_ioprio(bio); submit_bio_noacct(bio); } EXPORT_SYMBOL(submit_bio); diff --git a/block/blk-mq.c b/block/blk-mq.c index aa87fcfda1ecf..2dc01551e27c7 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -40,7 +40,6 @@ #include "blk-stat.h" #include "blk-mq-sched.h" #include "blk-rq-qos.h" -#include "blk-ioprio.h" static DEFINE_PER_CPU(struct llist_head, blk_cpu_done); static DEFINE_PER_CPU(call_single_data_t, blk_cpu_csd); @@ -2944,14 +2943,6 @@ static bool blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug, return true; } -static void bio_set_ioprio(struct bio *bio) -{ - /* Nobody set ioprio so far? Initialize it based on task's nice value */ - if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) == IOPRIO_CLASS_NONE) - bio->bi_ioprio = get_current_ioprio(); - blkcg_set_ioprio(bio); -} - /** * blk_mq_submit_bio - Create and send a request to block device. * @bio: Bio pointer. @@ -2976,7 +2967,6 @@ void blk_mq_submit_bio(struct bio *bio) blk_status_t ret; bio = blk_queue_bounce(bio, q); - bio_set_ioprio(bio); if (plug) { rq = rq_list_peek(&plug->cached_rq); -- GitLab From e77e15fa5eb1c830597c5ca53ea7af973bae2f78 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 1 Feb 2024 11:15:26 +0000 Subject: [PATCH 0571/1405] cifs: avoid redundant calls to disable multichannel When the server reports query network interface info call as unsupported following a tree connect, it means that multichannel is unsupported, even if the server capabilities report otherwise. When this happens, cifs_chan_skip_or_disable is called to disable multichannel on the client. However, we only need to call this when multichannel is currently setup. Fixes: f591062bdbf4 ("cifs: handle servers that still advertise multichannel after disabling") Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 6db54c6ef5719..e257b10bc8202 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -419,7 +419,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, rc = SMB3_request_interfaces(xid, tcon, false); free_xid(xid); - if (rc == -EOPNOTSUPP) { + if (rc == -EOPNOTSUPP && ses->chan_count > 1) { /* * some servers like Azure SMB server do not advertise * that multichannel has been disabled with server -- GitLab From 88675b22d34e6e815ad4bde09c590ccb2d50c59d Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 1 Feb 2024 11:15:28 +0000 Subject: [PATCH 0572/1405] cifs: do not search for channel if server is terminating In order to scale down the channels, the following sequence of operations happen: 1. server struct is marked for terminate 2. the channel is deallocated in the ses->chans array 3. at a later point the cifsd thread actually terminates the server Between 2 and 3, there can be calls to find the channel for a server struct. When that happens, there can be an ugly warning that's logged. But this is expected. So this change does two things: 1. in cifs_ses_get_chan_index, if server->terminate is set, return 2. always make sure server->terminate is set with chan_lock held Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/sess.c | 4 ++++ fs/smb/client/smb2pdu.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index cde81042bebda..3d2548c35c9d5 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -75,6 +75,10 @@ cifs_ses_get_chan_index(struct cifs_ses *ses, { unsigned int i; + /* if the channel is waiting for termination */ + if (server->terminate) + return CIFS_INVAL_CHAN_INDEX; + for (i = 0; i < ses->chan_count; i++) { if (ses->chans[i].server == server) return i; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index e257b10bc8202..c58fa44dd6b06 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -178,6 +178,7 @@ cifs_chan_skip_or_disable(struct cifs_ses *ses, } ses->chans[chan_index].server = NULL; + server->terminate = true; spin_unlock(&ses->chan_lock); /* @@ -188,7 +189,6 @@ cifs_chan_skip_or_disable(struct cifs_ses *ses, */ cifs_put_tcp_session(server, from_reconnect); - server->terminate = true; cifs_signal_cifsd_for_reconnect(server, false); /* mark primary server as needing reconnect */ -- GitLab From 6aac002bcfd554aff6d3ebb55e1660d078d70ab0 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 1 Feb 2024 11:15:29 +0000 Subject: [PATCH 0573/1405] cifs: failure to add channel on iface should bump up weight After the interface selection policy change to do a weighted round robin, each iface maintains a weight_fulfilled. When the weight_fulfilled reaches the total weight for the iface, we know that the weights can be reset and ifaces can be allocated from scratch again. During channel allocation failures on a particular channel, weight_fulfilled is not incremented. If a few interfaces are inactive, we could end up in a situation where the active interfaces are all allocated for the total_weight, and inactive ones are all that remain. This can cause a situation where no more channels can be allocated further. This change fixes it by increasing weight_fulfilled, even when channel allocation failure happens. This could mean that if there are temporary failures in channel allocation, the iface weights may not strictly be adhered to. But that's still okay. Fixes: a6d8fb54a515 ("cifs: distribute channels across interfaces based on speed") Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/sess.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 3d2548c35c9d5..ed4bd88dd528a 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -273,6 +273,8 @@ int cifs_try_adding_channels(struct cifs_ses *ses) &iface->sockaddr, rc); kref_put(&iface->refcount, release_iface); + /* failure to add chan should increase weight */ + iface->weight_fulfilled++; continue; } -- GitLab From 11d4d1dba3315f73d2d1d386f5bf4811a8241d45 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 29 Jan 2024 21:04:44 -0300 Subject: [PATCH 0574/1405] smb: client: increase number of PDUs allowed in a compound request With the introduction of SMB2_OP_QUERY_WSL_EA, the client may now send 5 commands in a single compound request in order to query xattrs from potential WSL reparse points, which should be fine as we currently allow up to 5 PDUs in a single compound request. However, if encryption is enabled (e.g. 'seal' mount option) or enforced by the server, current MAX_COMPOUND(5) won't be enough as we require an extra PDU for the transform header. Fix this by increasing MAX_COMPOUND to 7 and, while we're at it, add an WARN_ON_ONCE() and return -EIO instead of -ENOMEM in case we attempt to send a compound request that couldn't include the extra transform header. Signed-off-by: Paulo Alcantara Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 2 +- fs/smb/client/transport.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 9093c507042fa..c86a72c9d9ecd 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -87,7 +87,7 @@ #define SMB_INTERFACE_POLL_INTERVAL 600 /* maximum number of PDUs in one compound */ -#define MAX_COMPOUND 5 +#define MAX_COMPOUND 7 /* * Default number of credits to keep available for SMB3. diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index e00278fcfa4fa..994d701934329 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -435,8 +435,8 @@ smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, if (!(flags & CIFS_TRANSFORM_REQ)) return __smb_send_rqst(server, num_rqst, rqst); - if (num_rqst > MAX_COMPOUND - 1) - return -ENOMEM; + if (WARN_ON_ONCE(num_rqst > MAX_COMPOUND - 1)) + return -EIO; if (!server->ops->init_transform_rq) { cifs_server_dbg(VFS, "Encryption requested but transform callback is missing\n"); -- GitLab From 1458eb2c9d88ad4b35eb6d6a4aa1d43d8fbf7f62 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 17 Jan 2024 20:57:40 +0100 Subject: [PATCH 0575/1405] riscv: Fix set_huge_pte_at() for NAPOT mapping As stated by the privileged specification, we must clear a NAPOT mapping and emit a sfence.vma before setting a new translation. Fixes: 82a1a1f3bfb6 ("riscv: mm: support Svnapot in hugetlb page") Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240117195741.1926459-2-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/hugetlbpage.c | 42 +++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 431596c0e20e0..865ac5c7d159e 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -177,13 +177,36 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) return entry; } +static void clear_flush(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + unsigned long pgsize, + unsigned long ncontig) +{ + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); + unsigned long i, saddr = addr; + + for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) + ptep_get_and_clear(mm, addr, ptep); + + flush_tlb_range(&vma, saddr, addr); +} + +/* + * When dealing with NAPOT mappings, the privileged specification indicates that + * "if an update needs to be made, the OS generally should first mark all of the + * PTEs invalid, then issue SFENCE.VMA instruction(s) covering all 4 KiB regions + * within the range, [...] then update the PTE(s), as described in Section + * 4.2.1.". That's the equivalent of the Break-Before-Make approach used by + * arm64. + */ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned long sz) { - unsigned long hugepage_shift; + unsigned long hugepage_shift, pgsize; int i, pte_num; if (sz >= PGDIR_SIZE) @@ -198,7 +221,22 @@ void set_huge_pte_at(struct mm_struct *mm, hugepage_shift = PAGE_SHIFT; pte_num = sz >> hugepage_shift; - for (i = 0; i < pte_num; i++, ptep++, addr += (1 << hugepage_shift)) + pgsize = 1 << hugepage_shift; + + if (!pte_present(pte)) { + for (i = 0; i < pte_num; i++, ptep++, addr += pgsize) + set_ptes(mm, addr, ptep, pte, 1); + return; + } + + if (!pte_napot(pte)) { + set_ptes(mm, addr, ptep, pte, 1); + return; + } + + clear_flush(mm, addr, ptep, pgsize, pte_num); + + for (i = 0; i < pte_num; i++, ptep++, addr += pgsize) set_pte_at(mm, addr, ptep, pte); } -- GitLab From a179a4bfb694f80f2709a1d0398469e787acb974 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 17 Jan 2024 20:57:41 +0100 Subject: [PATCH 0576/1405] riscv: Fix hugetlb_mask_last_page() when NAPOT is enabled When NAPOT is enabled, a new hugepage size is available and then we need to make hugetlb_mask_last_page() aware of that. Fixes: 82a1a1f3bfb6 ("riscv: mm: support Svnapot in hugetlb page") Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240117195741.1926459-3-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/hugetlbpage.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 865ac5c7d159e..87406b26c3dad 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -125,6 +125,26 @@ pte_t *huge_pte_offset(struct mm_struct *mm, return pte; } +unsigned long hugetlb_mask_last_page(struct hstate *h) +{ + unsigned long hp_size = huge_page_size(h); + + switch (hp_size) { +#ifndef __PAGETABLE_PMD_FOLDED + case PUD_SIZE: + return P4D_SIZE - PUD_SIZE; +#endif + case PMD_SIZE: + return PUD_SIZE - PMD_SIZE; + case napot_cont_size(NAPOT_CONT64KB_ORDER): + return PMD_SIZE - napot_cont_size(NAPOT_CONT64KB_ORDER); + default: + break; + } + + return 0UL; +} + static pte_t get_clear_contig(struct mm_struct *mm, unsigned long addr, pte_t *ptep, -- GitLab From 021533194476035883300d60fbb3136426ac8ea5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 1 Feb 2024 14:57:17 -0800 Subject: [PATCH 0577/1405] Kconfig: Disable -Wstringop-overflow for GCC globally It turns out it was never just gcc-11 that was broken. Apparently it just happens to work on x86-64 with other gcc versions. On arm64, I see warnings with gcc version 13.2.1, and the kernel test robot reports the same problem on s390 with gcc 13.2.0. Admittedly it seems to be just the new Xe drm driver, but this is keeping me from doing my normal arm64 build testing. So it gets reverted until somebody figures out what causes the problem (and why it doesn't show on x86-64, which is what makes me suspect it was never just about gcc-11, and more about just random happenstance). This also changes the Kconfig naming a bit - just make the "disable this for GCC" conditional be one simple Kconfig entry, and we can put the gcc version dependencies in that entry once we figure out what the correct rules are. The version dependency _may_ still end up being "gcc version larger than 11" if the issue is purely in the Xe driver, but even if that ends up the case, let's make that all part of the "GCC_NO_STRINGOP_OVERFLOW" logic. For now, we just disable it for all gcc versions while the exact cause is unknown. Link: https://lore.kernel.org/all/202401161031.hjGJHMiJ-lkp@intel.com/T/ Cc: Gustavo A. R. Silva Cc: Kees Cook Signed-off-by: Linus Torvalds --- init/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 8d4e836e1b6b1..deda3d14135bb 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -876,13 +876,13 @@ config CC_NO_ARRAY_BOUNDS bool default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC11_NO_ARRAY_BOUNDS -# Currently, disable -Wstringop-overflow for GCC 11, globally. -config GCC11_NO_STRINGOP_OVERFLOW +# Currently, disable -Wstringop-overflow for GCC globally. +config GCC_NO_STRINGOP_OVERFLOW def_bool y config CC_NO_STRINGOP_OVERFLOW bool - default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC_VERSION < 120000 && GCC11_NO_STRINGOP_OVERFLOW + default y if CC_IS_GCC && GCC_NO_STRINGOP_OVERFLOW config CC_STRINGOP_OVERFLOW bool -- GitLab From 03facb39d6c6433a78d0f79c7a146b1e6a61943e Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 31 Jan 2024 07:08:54 -0800 Subject: [PATCH 0578/1405] drm/msm/gem: Fix double resv lock aquire MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 79e2cf2e7a19 ("drm/gem: Take reservation lock for vmap/vunmap operations"), the resv lock is already held in the prime vmap path, so don't try to grab it again. v2: This applies to vunmap path as well v3: Fix fixes commit Fixes: 79e2cf2e7a19 ("drm/gem: Take reservation lock for vmap/vunmap operations") Signed-off-by: Rob Clark Acked-by: Christian König Patchwork: https://patchwork.freedesktop.org/patch/576642/ --- drivers/gpu/drm/msm/msm_gem_prime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c index 5f68e31a3e4e1..0915f3b68752e 100644 --- a/drivers/gpu/drm/msm/msm_gem_prime.c +++ b/drivers/gpu/drm/msm/msm_gem_prime.c @@ -26,7 +26,7 @@ int msm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map) { void *vaddr; - vaddr = msm_gem_get_vaddr(obj); + vaddr = msm_gem_get_vaddr_locked(obj); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); iosys_map_set_vaddr(map, vaddr); @@ -36,7 +36,7 @@ int msm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map) void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct iosys_map *map) { - msm_gem_put_vaddr(obj); + msm_gem_put_vaddr_locked(obj); } struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, -- GitLab From 6a0dbcd20ef252ebf98af94186a2e53da7167bed Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 9 Jan 2024 22:41:08 +0200 Subject: [PATCH 0579/1405] drm/msm/a6xx: set highest_bank_bit to 13 for a610 During the testing of Gnome on Qualcomm Robotics platform screen corruption has been observed. Lowering GPU's highest_bank_bit from 14 to 13 seems to fix the screen corruption. Note, the MDSS and DPU drivers use HBB=1 (which maps to the highest_bank_bit = 14). So this change merely works around the UBWC swizzling issue on this platform until the real cause is found. Fixes: e7fc9398e608 ("drm/msm/a6xx: Add A610 support") Signed-off-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/573838/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index c0bc924cd3025..c9c55e2ea5849 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1287,7 +1287,7 @@ static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu) gpu->ubwc_config.highest_bank_bit = 15; if (adreno_is_a610(gpu)) { - gpu->ubwc_config.highest_bank_bit = 14; + gpu->ubwc_config.highest_bank_bit = 13; gpu->ubwc_config.min_acc_len = 1; gpu->ubwc_config.ubwc_mode = 1; } -- GitLab From 917e9b7c2350e3e53162fcf5035e5f2d68e2cbed Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 9 Jan 2024 10:22:17 -0800 Subject: [PATCH 0580/1405] Revert "drm/msm/gpu: Push gpu lock down past runpm" This reverts commit abe2023b4cea192ab266b351fd38dc9dbd846df0. Changing the locking order means that scheduler/msm_job_run() can race with the recovery kthread worker, with the result that the GPU gets an extra runpm get when we are trying to power it off. Leaving the GPU in an unrecovered state. I'll need to come up with a different scheme for appeasing lockdep. Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/573835/ --- drivers/gpu/drm/msm/msm_gpu.c | 11 +++++------ drivers/gpu/drm/msm/msm_ringbuffer.c | 7 +++++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 095390774f22b..655002b21b0d5 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -751,12 +751,14 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned long flags; - pm_runtime_get_sync(&gpu->pdev->dev); + WARN_ON(!mutex_is_locked(&gpu->lock)); - mutex_lock(&gpu->lock); + pm_runtime_get_sync(&gpu->pdev->dev); msm_gpu_hw_init(gpu); + submit->seqno = submit->hw_fence->seqno; + update_sw_cntrs(gpu); /* @@ -781,11 +783,8 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) gpu->funcs->submit(gpu, submit); gpu->cur_ctx_seqno = submit->queue->ctx->seqno; - hangcheck_timer_reset(gpu); - - mutex_unlock(&gpu->lock); - pm_runtime_put(&gpu->pdev->dev); + hangcheck_timer_reset(gpu); } /* diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 4bc13f7d005ab..9d6655f96f0ce 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -21,8 +21,6 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) msm_fence_init(submit->hw_fence, fctx); - submit->seqno = submit->hw_fence->seqno; - mutex_lock(&priv->lru.lock); for (i = 0; i < submit->nr_bos; i++) { @@ -35,8 +33,13 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) mutex_unlock(&priv->lru.lock); + /* TODO move submit path over to using a per-ring lock.. */ + mutex_lock(&gpu->lock); + msm_gpu_submit(gpu, submit); + mutex_unlock(&gpu->lock); + return dma_fence_get(submit->hw_fence); } -- GitLab From 3fcc2b887a1ba4c1f45319cd8c54daa263ecbc36 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 27 Jan 2024 09:58:00 +0800 Subject: [PATCH 0581/1405] ext4: refactor ext4_da_map_blocks() Refactor and cleanup ext4_da_map_blocks(), reduce some unnecessary parameters and branches, no logic changes. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240127015825.1608160-2-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4cae8698f70cf..bbd5ee6dd3f37 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1704,7 +1704,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { if (ext4_es_is_hole(&es)) { - retval = 0; down_read(&EXT4_I(inode)->i_data_sem); goto add_delayed; } @@ -1749,26 +1748,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, retval = ext4_ext_map_blocks(NULL, inode, map, 0); else retval = ext4_ind_map_blocks(NULL, inode, map, 0); - -add_delayed: - if (retval == 0) { - int ret; - - /* - * XXX: __block_prepare_write() unmaps passed block, - * is it OK? - */ - - ret = ext4_insert_delayed_block(inode, map->m_lblk); - if (ret != 0) { - retval = ret; - goto out_unlock; - } - - map_bh(bh, inode->i_sb, invalid_block); - set_buffer_new(bh); - set_buffer_delay(bh); - } else if (retval > 0) { + if (retval < 0) + goto out_unlock; + if (retval > 0) { unsigned int status; if (unlikely(retval != map->m_len)) { @@ -1783,11 +1765,24 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; ext4_es_insert_extent(inode, map->m_lblk, map->m_len, map->m_pblk, status); + goto out_unlock; } +add_delayed: + /* + * XXX: __block_prepare_write() unmaps passed block, + * is it OK? + */ + retval = ext4_insert_delayed_block(inode, map->m_lblk); + if (retval) + goto out_unlock; + + map_bh(bh, inode->i_sb, invalid_block); + set_buffer_new(bh); + set_buffer_delay(bh); + out_unlock: up_read((&EXT4_I(inode)->i_data_sem)); - return retval; } -- GitLab From acf795dc161f3cf481db20f05db4250714e375e5 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 27 Jan 2024 09:58:01 +0800 Subject: [PATCH 0582/1405] ext4: convert to exclusive lock while inserting delalloc extents ext4_da_map_blocks() only hold i_data_sem in shared mode and i_rwsem when inserting delalloc extents, it could be raced by another querying path of ext4_map_blocks() without i_rwsem, .e.g buffered read path. Suppose we buffered read a file containing just a hole, and without any cached extents tree, then it is raced by another delayed buffered write to the same area or the near area belongs to the same hole, and the new delalloc extent could be overwritten to a hole extent. pread() pwrite() filemap_read_folio() ext4_mpage_readpages() ext4_map_blocks() down_read(i_data_sem) ext4_ext_determine_hole() //find hole ext4_ext_put_gap_in_cache() ext4_es_find_extent_range() //no delalloc extent ext4_da_map_blocks() down_read(i_data_sem) ext4_insert_delayed_block() //insert delalloc extent ext4_es_insert_extent() //overwrite delalloc extent to hole This race could lead to inconsistent delalloc extents tree and incorrect reserved space counter. Fix this by converting to hold i_data_sem in exclusive mode when adding a new delalloc extent in ext4_da_map_blocks(). Cc: stable@vger.kernel.org Signed-off-by: Zhang Yi Suggested-by: Jan Kara Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240127015825.1608160-3-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bbd5ee6dd3f37..b040337501e3e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1703,10 +1703,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { - if (ext4_es_is_hole(&es)) { - down_read(&EXT4_I(inode)->i_data_sem); + if (ext4_es_is_hole(&es)) goto add_delayed; - } /* * Delayed extent could be allocated by fallocate. @@ -1748,8 +1746,10 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, retval = ext4_ext_map_blocks(NULL, inode, map, 0); else retval = ext4_ind_map_blocks(NULL, inode, map, 0); - if (retval < 0) - goto out_unlock; + if (retval < 0) { + up_read(&EXT4_I(inode)->i_data_sem); + return retval; + } if (retval > 0) { unsigned int status; @@ -1765,24 +1765,21 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; ext4_es_insert_extent(inode, map->m_lblk, map->m_len, map->m_pblk, status); - goto out_unlock; + up_read(&EXT4_I(inode)->i_data_sem); + return retval; } + up_read(&EXT4_I(inode)->i_data_sem); add_delayed: - /* - * XXX: __block_prepare_write() unmaps passed block, - * is it OK? - */ + down_write(&EXT4_I(inode)->i_data_sem); retval = ext4_insert_delayed_block(inode, map->m_lblk); + up_write(&EXT4_I(inode)->i_data_sem); if (retval) - goto out_unlock; + return retval; map_bh(bh, inode->i_sb, invalid_block); set_buffer_new(bh); set_buffer_delay(bh); - -out_unlock: - up_read((&EXT4_I(inode)->i_data_sem)); return retval; } -- GitLab From 6430dea07e85958fa87d0276c0c4388dd51e630b Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 27 Jan 2024 09:58:02 +0800 Subject: [PATCH 0583/1405] ext4: correct the hole length returned by ext4_map_blocks() In ext4_map_blocks(), if we can't find a range of mapping in the extents cache, we are calling ext4_ext_map_blocks() to search the real path and ext4_ext_determine_hole() to determine the hole range. But if the querying range was partially or completely overlaped by a delalloc extent, we can't find it in the real extent path, so the returned hole length could be incorrect. Fortunately, ext4_ext_put_gap_in_cache() have already handle delalloc extent, but it searches start from the expanded hole_start, doesn't start from the querying range, so the delalloc extent found could not be the one that overlaped the querying range, plus, it also didn't adjust the hole length. Let's just remove ext4_ext_put_gap_in_cache(), handle delalloc and insert adjusted hole extent in ext4_ext_determine_hole(). Signed-off-by: Zhang Yi Suggested-by: Jan Kara Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240127015825.1608160-4-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 111 +++++++++++++++++++++++++++++----------------- 1 file changed, 70 insertions(+), 41 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 9106715254ac0..c5b54fb9eb8bd 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2229,7 +2229,7 @@ static int ext4_fill_es_cache_info(struct inode *inode, /* - * ext4_ext_determine_hole - determine hole around given block + * ext4_ext_find_hole - find hole around given block according to the given path * @inode: inode we lookup in * @path: path in extent tree to @lblk * @lblk: pointer to logical block around which we want to determine hole @@ -2241,9 +2241,9 @@ static int ext4_fill_es_cache_info(struct inode *inode, * The function returns the length of a hole starting at @lblk. We update @lblk * to the beginning of the hole if we managed to find it. */ -static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode, - struct ext4_ext_path *path, - ext4_lblk_t *lblk) +static ext4_lblk_t ext4_ext_find_hole(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t *lblk) { int depth = ext_depth(inode); struct ext4_extent *ex; @@ -2270,30 +2270,6 @@ static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode, return len; } -/* - * ext4_ext_put_gap_in_cache: - * calculate boundaries of the gap that the requested block fits into - * and cache this gap - */ -static void -ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start, - ext4_lblk_t hole_len) -{ - struct extent_status es; - - ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, - hole_start + hole_len - 1, &es); - if (es.es_len) { - /* There's delayed extent containing lblock? */ - if (es.es_lblk <= hole_start) - return; - hole_len = min(es.es_lblk - hole_start, hole_len); - } - ext_debug(inode, " -> %u:%u\n", hole_start, hole_len); - ext4_es_insert_extent(inode, hole_start, hole_len, ~0, - EXTENT_STATUS_HOLE); -} - /* * ext4_ext_rm_idx: * removes index from the index block. @@ -4062,6 +4038,69 @@ static int get_implied_cluster_alloc(struct super_block *sb, return 0; } +/* + * Determine hole length around the given logical block, first try to + * locate and expand the hole from the given @path, and then adjust it + * if it's partially or completely converted to delayed extents, insert + * it into the extent cache tree if it's indeed a hole, finally return + * the length of the determined extent. + */ +static ext4_lblk_t ext4_ext_determine_insert_hole(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t lblk) +{ + ext4_lblk_t hole_start, len; + struct extent_status es; + + hole_start = lblk; + len = ext4_ext_find_hole(inode, path, &hole_start); +again: + ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start, + hole_start + len - 1, &es); + if (!es.es_len) + goto insert_hole; + + /* + * There's a delalloc extent in the hole, handle it if the delalloc + * extent is in front of, behind and straddle the queried range. + */ + if (lblk >= es.es_lblk + es.es_len) { + /* + * The delalloc extent is in front of the queried range, + * find again from the queried start block. + */ + len -= lblk - hole_start; + hole_start = lblk; + goto again; + } else if (in_range(lblk, es.es_lblk, es.es_len)) { + /* + * The delalloc extent containing lblk, it must have been + * added after ext4_map_blocks() checked the extent status + * tree, adjust the length to the delalloc extent's after + * lblk. + */ + len = es.es_lblk + es.es_len - lblk; + return len; + } else { + /* + * The delalloc extent is partially or completely behind + * the queried range, update hole length until the + * beginning of the delalloc extent. + */ + len = min(es.es_lblk - hole_start, len); + } + +insert_hole: + /* Put just found gap into cache to speed up subsequent requests */ + ext_debug(inode, " -> %u:%u\n", hole_start, len); + ext4_es_insert_extent(inode, hole_start, len, ~0, EXTENT_STATUS_HOLE); + + /* Update hole_len to reflect hole size after lblk */ + if (hole_start != lblk) + len -= lblk - hole_start; + + return len; +} /* * Block allocation/map/preallocation routine for extents based files @@ -4179,22 +4218,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * we couldn't try to create block if create flag is zero */ if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { - ext4_lblk_t hole_start, hole_len; + ext4_lblk_t len; - hole_start = map->m_lblk; - hole_len = ext4_ext_determine_hole(inode, path, &hole_start); - /* - * put just found gap into cache to speed up - * subsequent requests - */ - ext4_ext_put_gap_in_cache(inode, hole_start, hole_len); + len = ext4_ext_determine_insert_hole(inode, path, map->m_lblk); - /* Update hole_len to reflect hole size after map->m_lblk */ - if (hole_start != map->m_lblk) - hole_len -= map->m_lblk - hole_start; map->m_pblk = 0; - map->m_len = min_t(unsigned int, map->m_len, hole_len); - + map->m_len = min_t(unsigned int, map->m_len, len); goto out; } -- GitLab From 683cd8259a9b883a51973511f860976db2550a6e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 26 Jan 2024 17:07:23 +0100 Subject: [PATCH 0584/1405] Input: atkbd - skip ATKBD_CMD_SETLEDS when skipping ATKBD_CMD_GETID After commit 936e4d49ecbc ("Input: atkbd - skip ATKBD_CMD_GETID in translated mode") the keyboard on Dell XPS 13 9350 / 9360 / 9370 models has stopped working after a suspend/resume. The problem appears to be that atkbd_probe() fails when called from atkbd_reconnect() on resume, which on systems where ATKBD_CMD_GETID is skipped can only happen by ATKBD_CMD_SETLEDS failing. ATKBD_CMD_SETLEDS failing because ATKBD_CMD_GETID was skipped is weird, but apparently that is what is happening. Fix this by also skipping ATKBD_CMD_SETLEDS when skipping ATKBD_CMD_GETID. Fixes: 936e4d49ecbc ("Input: atkbd - skip ATKBD_CMD_GETID in translated mode") Reported-by: Paul Menzel Closes: https://lore.kernel.org/linux-input/0aa4a61f-c939-46fe-a572-08022e8931c7@molgen.mpg.de/ Closes: https://bbs.archlinux.org/viewtopic.php?pid=2146300 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218424 Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2260517 Tested-by: Paul Menzel Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240126160724.13278-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/atkbd.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index 13ef6284223da..c229bd6b3f7f2 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -811,7 +811,6 @@ static int atkbd_probe(struct atkbd *atkbd) { struct ps2dev *ps2dev = &atkbd->ps2dev; unsigned char param[2]; - bool skip_getid; /* * Some systems, where the bit-twiddling when testing the io-lines of the @@ -825,6 +824,11 @@ static int atkbd_probe(struct atkbd *atkbd) "keyboard reset failed on %s\n", ps2dev->serio->phys); + if (atkbd_skip_getid(atkbd)) { + atkbd->id = 0xab83; + return 0; + } + /* * Then we check the keyboard ID. We should get 0xab83 under normal conditions. * Some keyboards report different values, but the first byte is always 0xab or @@ -833,18 +837,17 @@ static int atkbd_probe(struct atkbd *atkbd) */ param[0] = param[1] = 0xa5; /* initialize with invalid values */ - skip_getid = atkbd_skip_getid(atkbd); - if (skip_getid || ps2_command(ps2dev, param, ATKBD_CMD_GETID)) { + if (ps2_command(ps2dev, param, ATKBD_CMD_GETID)) { /* - * If the get ID command was skipped or failed, we check if we can at least set + * If the get ID command failed, we check if we can at least set * the LEDs on the keyboard. This should work on every keyboard out there. * It also turns the LEDs off, which we want anyway. */ param[0] = 0; if (ps2_command(ps2dev, param, ATKBD_CMD_SETLEDS)) return -1; - atkbd->id = skip_getid ? 0xab83 : 0xabba; + atkbd->id = 0xabba; return 0; } -- GitLab From 9cf6e24c9fbf17e52de9fff07f12be7565ea6d61 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 26 Jan 2024 17:07:24 +0100 Subject: [PATCH 0585/1405] Input: atkbd - do not skip atkbd_deactivate() when skipping ATKBD_CMD_GETID After commit 936e4d49ecbc ("Input: atkbd - skip ATKBD_CMD_GETID in translated mode") not only the getid command is skipped, but also the de-activating of the keyboard at the end of atkbd_probe(), potentially re-introducing the problem fixed by commit be2d7e4233a4 ("Input: atkbd - fix multi-byte scancode handling on reconnect"). Make sure multi-byte scancode handling on reconnect is still handled correctly by not skipping the atkbd_deactivate() call. Fixes: 936e4d49ecbc ("Input: atkbd - skip ATKBD_CMD_GETID in translated mode") Tested-by: Paul Menzel Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240126160724.13278-3-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/atkbd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index c229bd6b3f7f2..7f67f9f2946b4 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -826,7 +826,7 @@ static int atkbd_probe(struct atkbd *atkbd) if (atkbd_skip_getid(atkbd)) { atkbd->id = 0xab83; - return 0; + goto deactivate_kbd; } /* @@ -863,6 +863,7 @@ static int atkbd_probe(struct atkbd *atkbd) return -1; } +deactivate_kbd: /* * Make sure nothing is coming from the keyboard and disturbs our * internal state. -- GitLab From 9f1118223aa080021fe9751fa221590654d27669 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 27 Jan 2024 09:58:03 +0800 Subject: [PATCH 0586/1405] ext4: add a hole extent entry in cache after punch In order to cache hole extents in the extent status tree and keep the hole length as long as possible, re-add a hole entry to the cache just after punching a hole. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240127015825.1608160-5-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b040337501e3e..af205b416794d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4007,12 +4007,12 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) /* If there are blocks to remove, do it */ if (stop_block > first_block) { + ext4_lblk_t hole_len = stop_block - first_block; down_write(&EXT4_I(inode)->i_data_sem); ext4_discard_preallocations(inode); - ext4_es_remove_extent(inode, first_block, - stop_block - first_block); + ext4_es_remove_extent(inode, first_block, hole_len); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) ret = ext4_ext_remove_space(inode, first_block, @@ -4021,6 +4021,8 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) ret = ext4_ind_remove_space(handle, inode, first_block, stop_block); + ext4_es_insert_extent(inode, first_block, hole_len, ~0, + EXTENT_STATUS_HOLE); up_write(&EXT4_I(inode)->i_data_sem); } ext4_fc_track_range(handle, inode, first_block, stop_block); -- GitLab From 874eaba96d39334fe9c729ff631e65523616d4d8 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 27 Jan 2024 09:58:04 +0800 Subject: [PATCH 0587/1405] ext4: make ext4_map_blocks() distinguish delalloc only extent Add a new map flag EXT4_MAP_DELAYED to indicate the mapping range is a delayed allocated only (not unwritten) one, and making ext4_map_blocks() can distinguish it, no longer mixing it with holes. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240127015825.1608160-6-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 4 +++- fs/ext4/extents.c | 7 +++++-- fs/ext4/inode.c | 2 ++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 786b6857ab474..023571f8dd1b4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -252,8 +252,10 @@ struct ext4_allocation_request { #define EXT4_MAP_MAPPED BIT(BH_Mapped) #define EXT4_MAP_UNWRITTEN BIT(BH_Unwritten) #define EXT4_MAP_BOUNDARY BIT(BH_Boundary) +#define EXT4_MAP_DELAYED BIT(BH_Delay) #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ - EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY) + EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ + EXT4_MAP_DELAYED) struct ext4_map_blocks { ext4_fsblk_t m_pblk; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c5b54fb9eb8bd..7669d154c05e0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4076,8 +4076,11 @@ static ext4_lblk_t ext4_ext_determine_insert_hole(struct inode *inode, /* * The delalloc extent containing lblk, it must have been * added after ext4_map_blocks() checked the extent status - * tree, adjust the length to the delalloc extent's after - * lblk. + * tree so we are not holding i_rwsem and delalloc info is + * only stabilized by i_data_sem we are going to release + * soon. Don't modify the extent status tree and report + * extent as a hole, just adjust the length to the delalloc + * extent's after lblk. */ len = es.es_lblk + es.es_len - lblk; return len; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index af205b416794d..60f0d2dc1c379 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -515,6 +515,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, map->m_len = retval; } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) { map->m_pblk = 0; + map->m_flags |= ext4_es_is_delayed(&es) ? + EXT4_MAP_DELAYED : 0; retval = es.es_len - (map->m_lblk - es.es_lblk); if (retval > map->m_len) retval = map->m_len; -- GitLab From ec9d669eba4c276d00af88951947fe0e82a6b84c Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 27 Jan 2024 09:58:05 +0800 Subject: [PATCH 0588/1405] ext4: make ext4_set_iomap() recognize IOMAP_DELALLOC map type Since ext4_map_blocks() can recognize a delayed allocated only extent, make ext4_set_iomap() can also recognize it, and remove the useless separate check in ext4_iomap_begin_report(). Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240127015825.1608160-7-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 32 +++----------------------------- 1 file changed, 3 insertions(+), 29 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 60f0d2dc1c379..2ccf3b5e3a7c4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3262,6 +3262,9 @@ static void ext4_set_iomap(struct inode *inode, struct iomap *iomap, iomap->addr = (u64) map->m_pblk << blkbits; if (flags & IOMAP_DAX) iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off; + } else if (map->m_flags & EXT4_MAP_DELAYED) { + iomap->type = IOMAP_DELALLOC; + iomap->addr = IOMAP_NULL_ADDR; } else { iomap->type = IOMAP_HOLE; iomap->addr = IOMAP_NULL_ADDR; @@ -3424,35 +3427,11 @@ const struct iomap_ops ext4_iomap_overwrite_ops = { .iomap_end = ext4_iomap_end, }; -static bool ext4_iomap_is_delalloc(struct inode *inode, - struct ext4_map_blocks *map) -{ - struct extent_status es; - ext4_lblk_t offset = 0, end = map->m_lblk + map->m_len - 1; - - ext4_es_find_extent_range(inode, &ext4_es_is_delayed, - map->m_lblk, end, &es); - - if (!es.es_len || es.es_lblk > end) - return false; - - if (es.es_lblk > map->m_lblk) { - map->m_len = es.es_lblk - map->m_lblk; - return false; - } - - offset = map->m_lblk - es.es_lblk; - map->m_len = es.es_len - offset; - - return true; -} - static int ext4_iomap_begin_report(struct inode *inode, loff_t offset, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { int ret; - bool delalloc = false; struct ext4_map_blocks map; u8 blkbits = inode->i_blkbits; @@ -3493,13 +3472,8 @@ static int ext4_iomap_begin_report(struct inode *inode, loff_t offset, ret = ext4_map_blocks(NULL, inode, &map, 0); if (ret < 0) return ret; - if (ret == 0) - delalloc = ext4_iomap_is_delalloc(inode, &map); - set_iomap: ext4_set_iomap(inode, iomap, &map, offset, length, flags); - if (delalloc && iomap->type == IOMAP_HOLE) - iomap->type = IOMAP_DELALLOC; return 0; } -- GitLab From b5e69be185495696652405088a27ab0b21812147 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 30 Jan 2024 13:24:40 +1000 Subject: [PATCH 0589/1405] nouveau/gsp: use correct size for registry rpc. Timur pointed this out before, and it just slipped my mind, but this might help some things work better, around pcie power management. Fixes: 8d55b0a940bb ("nouveau/gsp: add some basic registry entries.") Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/576336/ --- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index 9ee58e2a0eb2a..5e1fa176aac4a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -1078,7 +1078,6 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp) if (IS_ERR(rpc)) return PTR_ERR(rpc); - rpc->size = sizeof(*rpc); rpc->numEntries = NV_GSP_REG_NUM_ENTRIES; str_offset = offsetof(typeof(*rpc), entries[NV_GSP_REG_NUM_ENTRIES]); @@ -1094,6 +1093,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp) strings += name_len; str_offset += name_len; } + rpc->size = str_offset; return nvkm_gsp_rpc_wr(gsp, rpc, false); } -- GitLab From 39126abc5e20611579602f03b66627d7cd1422f0 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 29 Jan 2024 11:26:45 +1000 Subject: [PATCH 0590/1405] nouveau: offload fence uevents work to workqueue This should break the deadlock between the fctx lock and the irq lock. This offloads the processing off the work from the irq into a workqueue. Cc: linux-stable@vger.kernel.org Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/576237/ --- drivers/gpu/drm/nouveau/nouveau_fence.c | 24 ++++++++++++++++++------ drivers/gpu/drm/nouveau/nouveau_fence.h | 1 + 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index ca762ea554136..93f08f9479d89 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -103,6 +103,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) void nouveau_fence_context_del(struct nouveau_fence_chan *fctx) { + cancel_work_sync(&fctx->uevent_work); nouveau_fence_context_kill(fctx, 0); nvif_event_dtor(&fctx->event); fctx->dead = 1; @@ -145,12 +146,13 @@ nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fc return drop; } -static int -nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc) +static void +nouveau_fence_uevent_work(struct work_struct *work) { - struct nouveau_fence_chan *fctx = container_of(event, typeof(*fctx), event); + struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan, + uevent_work); unsigned long flags; - int ret = NVIF_EVENT_KEEP; + int drop = 0; spin_lock_irqsave(&fctx->lock, flags); if (!list_empty(&fctx->pending)) { @@ -160,11 +162,20 @@ nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc fence = list_entry(fctx->pending.next, typeof(*fence), head); chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock)); if (nouveau_fence_update(chan, fctx)) - ret = NVIF_EVENT_DROP; + drop = 1; } + if (drop) + nvif_event_block(&fctx->event); + spin_unlock_irqrestore(&fctx->lock, flags); +} - return ret; +static int +nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc) +{ + struct nouveau_fence_chan *fctx = container_of(event, typeof(*fctx), event); + schedule_work(&fctx->uevent_work); + return NVIF_EVENT_KEEP; } void @@ -178,6 +189,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha } args; int ret; + INIT_WORK(&fctx->uevent_work, nouveau_fence_uevent_work); INIT_LIST_HEAD(&fctx->flip); INIT_LIST_HEAD(&fctx->pending); spin_lock_init(&fctx->lock); diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 64d33ae7f3561..8bc065acfe358 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -44,6 +44,7 @@ struct nouveau_fence_chan { u32 context; char name[32]; + struct work_struct uevent_work; struct nvif_event event; int notify_ref, dead, killed; }; -- GitLab From d2d00e15808c37ec476a5c040ee2cdd23854ef18 Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Fri, 26 Jan 2024 09:09:18 -0600 Subject: [PATCH 0591/1405] powerpc: iommu: Bring back table group release_ownership() call The commit 2ad56efa80db ("powerpc/iommu: Setup a default domain and remove set_platform_dma_ops") refactored the code removing the set_platform_dma_ops(). It missed out the table group release_ownership() call which would have got called otherwise during the guest shutdown via vfio_group_detach_container(). On PPC64, this particular call actually sets up the 32-bit TCE table, and enables the 64-bit DMA bypass etc. Now after guest shutdown, the subsequent host driver (e.g megaraid-sas) probe post unbind from vfio-pci fails like, megaraid_sas 0031:01:00.0: Warning: IOMMU dma not supported: mask 0x7fffffffffffffff, table unavailable megaraid_sas 0031:01:00.0: Warning: IOMMU dma not supported: mask 0xffffffff, table unavailable megaraid_sas 0031:01:00.0: Failed to set DMA mask megaraid_sas 0031:01:00.0: Failed from megasas_init_fw 6539 The patch brings back the call to table_group release_ownership() call when switching back to PLATFORM domain from BLOCKED, while also separates the domain_ops for both. Fixes: 2ad56efa80db ("powerpc/iommu: Setup a default domain and remove set_platform_dma_ops") Signed-off-by: Shivaprasad G Bhat Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/170628173462.3742.18330000394415935845.stgit@ltcd48-lp2.aus.stglab.ibm.com Signed-off-by: Joerg Roedel --- arch/powerpc/kernel/iommu.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index ebe259bdd4629..d71eac3b2887b 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1287,20 +1287,20 @@ spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain, struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct iommu_group *grp = iommu_group_get(dev); struct iommu_table_group *table_group; - int ret = -EINVAL; /* At first attach the ownership is already set */ if (!domain) return 0; - if (!grp) - return -ENODEV; - table_group = iommu_group_get_iommudata(grp); - ret = table_group->ops->take_ownership(table_group); + /* + * The domain being set to PLATFORM from earlier + * BLOCKED. The table_group ownership has to be released. + */ + table_group->ops->release_ownership(table_group); iommu_group_put(grp); - return ret; + return 0; } static const struct iommu_domain_ops spapr_tce_platform_domain_ops = { @@ -1312,13 +1312,32 @@ static struct iommu_domain spapr_tce_platform_domain = { .ops = &spapr_tce_platform_domain_ops, }; -static struct iommu_domain spapr_tce_blocked_domain = { - .type = IOMMU_DOMAIN_BLOCKED, +static int +spapr_tce_blocked_iommu_attach_dev(struct iommu_domain *platform_domain, + struct device *dev) +{ + struct iommu_group *grp = iommu_group_get(dev); + struct iommu_table_group *table_group; + int ret = -EINVAL; + /* * FIXME: SPAPR mixes blocked and platform behaviors, the blocked domain * also sets the dma_api ops */ - .ops = &spapr_tce_platform_domain_ops, + table_group = iommu_group_get_iommudata(grp); + ret = table_group->ops->take_ownership(table_group); + iommu_group_put(grp); + + return ret; +} + +static const struct iommu_domain_ops spapr_tce_blocked_domain_ops = { + .attach_dev = spapr_tce_blocked_iommu_attach_dev, +}; + +static struct iommu_domain spapr_tce_blocked_domain = { + .type = IOMMU_DOMAIN_BLOCKED, + .ops = &spapr_tce_blocked_domain_ops, }; static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap) -- GitLab From bb6f4dbe2639d5b8a9fde4bfb6fefecfd3f18df3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Thu, 25 Jan 2024 16:32:29 +0100 Subject: [PATCH 0592/1405] selftests/landlock: Fix capability for net_test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CAP_NET_ADMIN allows to configure network interfaces, not CAP_SYS_ADMIN which only allows to call unshare(2). Without this change, running network tests as a non-root user but with all capabilities would fail at the setup_loopback() step with "RTNETLINK answers: Operation not permitted". The issue is only visible when running tests with non-root users (i.e. only relying on ambient capabilities). Indeed, when configuring the network interface, the "ip" command is called, which may lead to the special handling of capabilities for the root user by execve(2). If root is the caller, then the inherited, permitted and effective capabilities are all reset, which then includes CAP_NET_ADMIN. However, if a non-root user is the caller, then ambient capabilities are masked by the inherited ones, which were explicitly dropped. To make execution deterministic whatever users are running the tests, set the noroot secure bit for each test, and set the inheritable and ambient capabilities to CAP_NET_ADMIN, the only capability that may be required after an execve(2). Factor out _effective_cap() into _change_cap(), and use it to manage ambient capabilities with the new set_ambient_cap() and clear_ambient_cap() helpers. This makes it possible to run all Landlock tests with check-linux.sh from https://github.com/landlock-lsm/landlock-test-tools Cc: Konstantin Meskhidze Fixes: a549d055a22e ("selftests/landlock: Add network tests") Link: https://lore.kernel.org/r/20240125153230.3817165-2-mic@digikod.net [mic: Make sure SECBIT_NOROOT_LOCKED is set] Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/common.h | 48 +++++++++++++++++---- tools/testing/selftests/landlock/net_test.c | 5 ++- 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index 5b79758cae627..e64bbdf0e86ea 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -115,11 +116,16 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all) /* clang-format off */ CAP_DAC_OVERRIDE, CAP_MKNOD, + CAP_NET_ADMIN, + CAP_NET_BIND_SERVICE, CAP_SYS_ADMIN, CAP_SYS_CHROOT, - CAP_NET_BIND_SERVICE, /* clang-format on */ }; + const unsigned int noroot = SECBIT_NOROOT | SECBIT_NOROOT_LOCKED; + + if ((cap_get_secbits() & noroot) != noroot) + EXPECT_EQ(0, cap_set_secbits(noroot)); cap_p = cap_get_proc(); EXPECT_NE(NULL, cap_p) @@ -137,6 +143,8 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all) TH_LOG("Failed to cap_set_flag: %s", strerror(errno)); } } + + /* Automatically resets ambient capabilities. */ EXPECT_NE(-1, cap_set_proc(cap_p)) { TH_LOG("Failed to cap_set_proc: %s", strerror(errno)); @@ -145,6 +153,9 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all) { TH_LOG("Failed to cap_free: %s", strerror(errno)); } + + /* Quickly checks that ambient capabilities are cleared. */ + EXPECT_NE(-1, cap_get_ambient(caps[0])); } /* We cannot put such helpers in a library because of kselftest_harness.h . */ @@ -158,8 +169,9 @@ static void __maybe_unused drop_caps(struct __test_metadata *const _metadata) _init_caps(_metadata, true); } -static void _effective_cap(struct __test_metadata *const _metadata, - const cap_value_t caps, const cap_flag_value_t value) +static void _change_cap(struct __test_metadata *const _metadata, + const cap_flag_t flag, const cap_value_t cap, + const cap_flag_value_t value) { cap_t cap_p; @@ -168,7 +180,7 @@ static void _effective_cap(struct __test_metadata *const _metadata, { TH_LOG("Failed to cap_get_proc: %s", strerror(errno)); } - EXPECT_NE(-1, cap_set_flag(cap_p, CAP_EFFECTIVE, 1, &caps, value)) + EXPECT_NE(-1, cap_set_flag(cap_p, flag, 1, &cap, value)) { TH_LOG("Failed to cap_set_flag: %s", strerror(errno)); } @@ -183,15 +195,35 @@ static void _effective_cap(struct __test_metadata *const _metadata, } static void __maybe_unused set_cap(struct __test_metadata *const _metadata, - const cap_value_t caps) + const cap_value_t cap) { - _effective_cap(_metadata, caps, CAP_SET); + _change_cap(_metadata, CAP_EFFECTIVE, cap, CAP_SET); } static void __maybe_unused clear_cap(struct __test_metadata *const _metadata, - const cap_value_t caps) + const cap_value_t cap) +{ + _change_cap(_metadata, CAP_EFFECTIVE, cap, CAP_CLEAR); +} + +static void __maybe_unused +set_ambient_cap(struct __test_metadata *const _metadata, const cap_value_t cap) +{ + _change_cap(_metadata, CAP_INHERITABLE, cap, CAP_SET); + + EXPECT_NE(-1, cap_set_ambient(cap, CAP_SET)) + { + TH_LOG("Failed to set ambient capability %d: %s", cap, + strerror(errno)); + } +} + +static void __maybe_unused clear_ambient_cap( + struct __test_metadata *const _metadata, const cap_value_t cap) { - _effective_cap(_metadata, caps, CAP_CLEAR); + EXPECT_EQ(1, cap_get_ambient(cap)); + _change_cap(_metadata, CAP_INHERITABLE, cap, CAP_CLEAR); + EXPECT_EQ(0, cap_get_ambient(cap)); } /* Receives an FD from a UNIX socket. Returns the received FD, or -errno. */ diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c index efcde123af1f2..936cfc879f1d2 100644 --- a/tools/testing/selftests/landlock/net_test.c +++ b/tools/testing/selftests/landlock/net_test.c @@ -107,8 +107,11 @@ static void setup_loopback(struct __test_metadata *const _metadata) { set_cap(_metadata, CAP_SYS_ADMIN); ASSERT_EQ(0, unshare(CLONE_NEWNET)); - ASSERT_EQ(0, system("ip link set dev lo up")); clear_cap(_metadata, CAP_SYS_ADMIN); + + set_ambient_cap(_metadata, CAP_NET_ADMIN); + ASSERT_EQ(0, system("ip link set dev lo up")); + clear_ambient_cap(_metadata, CAP_NET_ADMIN); } static bool is_restricted(const struct protocol_variant *const prot, -- GitLab From 46eba193d04f8bd717e525eb4110f3c46c12aec3 Mon Sep 17 00:00:00 2001 From: Furong Xu <0x1207@gmail.com> Date: Wed, 31 Jan 2024 10:08:28 +0800 Subject: [PATCH 0593/1405] net: stmmac: xgmac: fix handling of DPP safety error for DMA channels Commit 56e58d6c8a56 ("net: stmmac: Implement Safety Features in XGMAC core") checks and reports safety errors, but leaves the Data Path Parity Errors for each channel in DMA unhandled at all, lead to a storm of interrupt. Fix it by checking and clearing the DMA_DPP_Interrupt_Status register. Fixes: 56e58d6c8a56 ("net: stmmac: Implement Safety Features in XGMAC core") Signed-off-by: Furong Xu <0x1207@gmail.com> Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/common.h | 1 + .../net/ethernet/stmicro/stmmac/dwxgmac2.h | 3 + .../ethernet/stmicro/stmmac/dwxgmac2_core.c | 57 ++++++++++++++++++- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 721c1f8e892fc..b4f60ab078d67 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -216,6 +216,7 @@ struct stmmac_safety_stats { unsigned long mac_errors[32]; unsigned long mtl_errors[32]; unsigned long dma_errors[32]; + unsigned long dma_dpp_errors[32]; }; /* Number of fields in Safety Stats */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 207ff1799f2c7..5c67a3f89f088 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -303,6 +303,8 @@ #define XGMAC_RXCEIE BIT(4) #define XGMAC_TXCEIE BIT(0) #define XGMAC_MTL_ECC_INT_STATUS 0x000010cc +#define XGMAC_MTL_DPP_CONTROL 0x000010e0 +#define XGMAC_DDPP_DISABLE BIT(0) #define XGMAC_MTL_TXQ_OPMODE(x) (0x00001100 + (0x80 * (x))) #define XGMAC_TQS GENMASK(25, 16) #define XGMAC_TQS_SHIFT 16 @@ -385,6 +387,7 @@ #define XGMAC_DCEIE BIT(1) #define XGMAC_TCEIE BIT(0) #define XGMAC_DMA_ECC_INT_STATUS 0x0000306c +#define XGMAC_DMA_DPP_INT_STATUS 0x00003074 #define XGMAC_DMA_CH_CONTROL(x) (0x00003100 + (0x80 * (x))) #define XGMAC_SPH BIT(24) #define XGMAC_PBLx8 BIT(16) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index eb48211d9b0eb..04d7c4dc2e35f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -830,6 +830,43 @@ static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= { { false, "UNKNOWN", "Unknown Error" }, /* 31 */ }; +static const char * const dpp_rx_err = "Read Rx Descriptor Parity checker Error"; +static const char * const dpp_tx_err = "Read Tx Descriptor Parity checker Error"; +static const struct dwxgmac3_error_desc dwxgmac3_dma_dpp_errors[32] = { + { true, "TDPES0", dpp_tx_err }, + { true, "TDPES1", dpp_tx_err }, + { true, "TDPES2", dpp_tx_err }, + { true, "TDPES3", dpp_tx_err }, + { true, "TDPES4", dpp_tx_err }, + { true, "TDPES5", dpp_tx_err }, + { true, "TDPES6", dpp_tx_err }, + { true, "TDPES7", dpp_tx_err }, + { true, "TDPES8", dpp_tx_err }, + { true, "TDPES9", dpp_tx_err }, + { true, "TDPES10", dpp_tx_err }, + { true, "TDPES11", dpp_tx_err }, + { true, "TDPES12", dpp_tx_err }, + { true, "TDPES13", dpp_tx_err }, + { true, "TDPES14", dpp_tx_err }, + { true, "TDPES15", dpp_tx_err }, + { true, "RDPES0", dpp_rx_err }, + { true, "RDPES1", dpp_rx_err }, + { true, "RDPES2", dpp_rx_err }, + { true, "RDPES3", dpp_rx_err }, + { true, "RDPES4", dpp_rx_err }, + { true, "RDPES5", dpp_rx_err }, + { true, "RDPES6", dpp_rx_err }, + { true, "RDPES7", dpp_rx_err }, + { true, "RDPES8", dpp_rx_err }, + { true, "RDPES9", dpp_rx_err }, + { true, "RDPES10", dpp_rx_err }, + { true, "RDPES11", dpp_rx_err }, + { true, "RDPES12", dpp_rx_err }, + { true, "RDPES13", dpp_rx_err }, + { true, "RDPES14", dpp_rx_err }, + { true, "RDPES15", dpp_rx_err }, +}; + static void dwxgmac3_handle_dma_err(struct net_device *ndev, void __iomem *ioaddr, bool correctable, struct stmmac_safety_stats *stats) @@ -841,6 +878,13 @@ static void dwxgmac3_handle_dma_err(struct net_device *ndev, dwxgmac3_log_error(ndev, value, correctable, "DMA", dwxgmac3_dma_errors, STAT_OFF(dma_errors), stats); + + value = readl(ioaddr + XGMAC_DMA_DPP_INT_STATUS); + writel(value, ioaddr + XGMAC_DMA_DPP_INT_STATUS); + + dwxgmac3_log_error(ndev, value, false, "DMA_DPP", + dwxgmac3_dma_dpp_errors, + STAT_OFF(dma_dpp_errors), stats); } static int @@ -881,6 +925,12 @@ dwxgmac3_safety_feat_config(void __iomem *ioaddr, unsigned int asp, value |= XGMAC_TMOUTEN; /* FSM Timeout Feature */ writel(value, ioaddr + XGMAC_MAC_FSM_CONTROL); + /* 5. Enable Data Path Parity Protection */ + value = readl(ioaddr + XGMAC_MTL_DPP_CONTROL); + /* already enabled by default, explicit enable it again */ + value &= ~XGMAC_DDPP_DISABLE; + writel(value, ioaddr + XGMAC_MTL_DPP_CONTROL); + return 0; } @@ -914,7 +964,11 @@ static int dwxgmac3_safety_feat_irq_status(struct net_device *ndev, ret |= !corr; } - err = dma & (XGMAC_DEUIS | XGMAC_DECIS); + /* DMA_DPP_Interrupt_Status is indicated by MCSIS bit in + * DMA_Safety_Interrupt_Status, so we handle DMA Data Path + * Parity Errors here + */ + err = dma & (XGMAC_DEUIS | XGMAC_DECIS | XGMAC_MCSIS); corr = dma & XGMAC_DECIS; if (err) { dwxgmac3_handle_dma_err(ndev, ioaddr, corr, stats); @@ -930,6 +984,7 @@ static const struct dwxgmac3_error { { dwxgmac3_mac_errors }, { dwxgmac3_mtl_errors }, { dwxgmac3_dma_errors }, + { dwxgmac3_dma_dpp_errors }, }; static int dwxgmac3_safety_feat_dump(struct stmmac_safety_stats *stats, -- GitLab From ccb88e9549e7cfd8bcd511c538f437e20026e983 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 25 Jan 2024 17:12:53 -0600 Subject: [PATCH 0594/1405] crypto: ccp - Fix null pointer dereference in __sev_platform_shutdown_locked The SEV platform device can be shutdown with a null psp_master, e.g., using DEBUG_TEST_DRIVER_REMOVE. Found using KASAN: [ 137.148210] ccp 0000:23:00.1: enabling device (0000 -> 0002) [ 137.162647] ccp 0000:23:00.1: no command queues available [ 137.170598] ccp 0000:23:00.1: sev enabled [ 137.174645] ccp 0000:23:00.1: psp enabled [ 137.178890] general protection fault, probably for non-canonical address 0xdffffc000000001e: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC KASAN NOPTI [ 137.182693] KASAN: null-ptr-deref in range [0x00000000000000f0-0x00000000000000f7] [ 137.182693] CPU: 93 PID: 1 Comm: swapper/0 Not tainted 6.8.0-rc1+ #311 [ 137.182693] RIP: 0010:__sev_platform_shutdown_locked+0x51/0x180 [ 137.182693] Code: 08 80 3c 08 00 0f 85 0e 01 00 00 48 8b 1d 67 b6 01 08 48 b8 00 00 00 00 00 fc ff df 48 8d bb f0 00 00 00 48 89 f9 48 c1 e9 03 <80> 3c 01 00 0f 85 fe 00 00 00 48 8b 9b f0 00 00 00 48 85 db 74 2c [ 137.182693] RSP: 0018:ffffc900000cf9b0 EFLAGS: 00010216 [ 137.182693] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 000000000000001e [ 137.182693] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 00000000000000f0 [ 137.182693] RBP: ffffc900000cf9c8 R08: 0000000000000000 R09: fffffbfff58f5a66 [ 137.182693] R10: ffffc900000cf9c8 R11: ffffffffac7ad32f R12: ffff8881e5052c28 [ 137.182693] R13: ffff8881e5052c28 R14: ffff8881758e43e8 R15: ffffffffac64abf8 [ 137.182693] FS: 0000000000000000(0000) GS:ffff889de7000000(0000) knlGS:0000000000000000 [ 137.182693] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 137.182693] CR2: 0000000000000000 CR3: 0000001cf7c7e000 CR4: 0000000000350ef0 [ 137.182693] Call Trace: [ 137.182693] [ 137.182693] ? show_regs+0x6c/0x80 [ 137.182693] ? __die_body+0x24/0x70 [ 137.182693] ? die_addr+0x4b/0x80 [ 137.182693] ? exc_general_protection+0x126/0x230 [ 137.182693] ? asm_exc_general_protection+0x2b/0x30 [ 137.182693] ? __sev_platform_shutdown_locked+0x51/0x180 [ 137.182693] sev_firmware_shutdown.isra.0+0x1e/0x80 [ 137.182693] sev_dev_destroy+0x49/0x100 [ 137.182693] psp_dev_destroy+0x47/0xb0 [ 137.182693] sp_destroy+0xbb/0x240 [ 137.182693] sp_pci_remove+0x45/0x60 [ 137.182693] pci_device_remove+0xaa/0x1d0 [ 137.182693] device_remove+0xc7/0x170 [ 137.182693] really_probe+0x374/0xbe0 [ 137.182693] ? srso_return_thunk+0x5/0x5f [ 137.182693] __driver_probe_device+0x199/0x460 [ 137.182693] driver_probe_device+0x4e/0xd0 [ 137.182693] __driver_attach+0x191/0x3d0 [ 137.182693] ? __pfx___driver_attach+0x10/0x10 [ 137.182693] bus_for_each_dev+0x100/0x190 [ 137.182693] ? __pfx_bus_for_each_dev+0x10/0x10 [ 137.182693] ? __kasan_check_read+0x15/0x20 [ 137.182693] ? srso_return_thunk+0x5/0x5f [ 137.182693] ? _raw_spin_unlock+0x27/0x50 [ 137.182693] driver_attach+0x41/0x60 [ 137.182693] bus_add_driver+0x2a8/0x580 [ 137.182693] driver_register+0x141/0x480 [ 137.182693] __pci_register_driver+0x1d6/0x2a0 [ 137.182693] ? srso_return_thunk+0x5/0x5f [ 137.182693] ? esrt_sysfs_init+0x1cd/0x5d0 [ 137.182693] ? __pfx_sp_mod_init+0x10/0x10 [ 137.182693] sp_pci_init+0x22/0x30 [ 137.182693] sp_mod_init+0x14/0x30 [ 137.182693] ? __pfx_sp_mod_init+0x10/0x10 [ 137.182693] do_one_initcall+0xd1/0x470 [ 137.182693] ? __pfx_do_one_initcall+0x10/0x10 [ 137.182693] ? parameq+0x80/0xf0 [ 137.182693] ? srso_return_thunk+0x5/0x5f [ 137.182693] ? __kmalloc+0x3b0/0x4e0 [ 137.182693] ? kernel_init_freeable+0x92d/0x1050 [ 137.182693] ? kasan_populate_vmalloc_pte+0x171/0x190 [ 137.182693] ? srso_return_thunk+0x5/0x5f [ 137.182693] kernel_init_freeable+0xa64/0x1050 [ 137.182693] ? __pfx_kernel_init+0x10/0x10 [ 137.182693] kernel_init+0x24/0x160 [ 137.182693] ? __switch_to_asm+0x3e/0x70 [ 137.182693] ret_from_fork+0x40/0x80 [ 137.182693] ? __pfx_kernel_init+0x10/0x10 [ 137.182693] ret_from_fork_asm+0x1b/0x30 [ 137.182693] [ 137.182693] Modules linked in: [ 137.538483] ---[ end trace 0000000000000000 ]--- Fixes: 1b05ece0c931 ("crypto: ccp - During shutdown, check SEV data pointer before using") Cc: stable@vger.kernel.org Reviewed-by: Mario Limonciello Signed-off-by: Kim Phillips Reviewed-by: Liam Merwick Acked-by: John Allen Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index e4d3f45242f63..b04bc1d3d627d 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -534,10 +534,16 @@ EXPORT_SYMBOL_GPL(sev_platform_init); static int __sev_platform_shutdown_locked(int *error) { - struct sev_device *sev = psp_master->sev_data; + struct psp_device *psp = psp_master; + struct sev_device *sev; int ret; - if (!sev || sev->state == SEV_STATE_UNINIT) + if (!psp || !psp->sev_data) + return 0; + + sev = psp->sev_data; + + if (sev->state == SEV_STATE_UNINIT) return 0; ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error); -- GitLab From 69fba378edcaffba7bc7d299fdee02e377069d30 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 29 Jan 2024 22:17:11 +0800 Subject: [PATCH 0595/1405] crypto: cbc - Ensure statesize is zero The cbc template should not be applied on stream ciphers, especially ones that have internal state. Enforce this by checking the state size when the instance is created. Reported-by: syzbot+050eeedd6c285d8c42f2@syzkaller.appspotmail.com Fixes: 47309ea13591 ("crypto: arc4 - Add internal state") Signed-off-by: Herbert Xu --- crypto/cbc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crypto/cbc.c b/crypto/cbc.c index eedddef9ce40c..e81918ca68b78 100644 --- a/crypto/cbc.c +++ b/crypto/cbc.c @@ -148,6 +148,9 @@ static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb) if (!is_power_of_2(inst->alg.co.base.cra_blocksize)) goto out_free_inst; + if (inst->alg.co.statesize) + goto out_free_inst; + inst->alg.encrypt = crypto_cbc_encrypt; inst->alg.decrypt = crypto_cbc_decrypt; -- GitLab From 24c890dd712f6345e382256cae8c97abb0406b70 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 1 Feb 2024 13:49:09 +0800 Subject: [PATCH 0596/1405] crypto: algif_hash - Remove bogus SGL free on zero-length error path When a zero-length message is hashed by algif_hash, and an error is triggered, it tries to free an SG list that was never allocated in the first place. Fix this by not freeing the SG list on the zero-length error path. Reported-by: Shigeru Yoshida Reported-by: xingwei lee Fixes: b6d972f68983 ("crypto: af_alg/hash: Fix recvmsg() after sendmsg(MSG_MORE)") Cc: Signed-off-by: Herbert Xu Reported-by: syzbot+3266db0c26d1fbbe3abb@syzkaller.appspotmail.com Signed-off-by: Herbert Xu --- crypto/algif_hash.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 82c44d4899b96..e24c829d7a015 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -91,13 +91,13 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg, if (!(msg->msg_flags & MSG_MORE)) { err = hash_alloc_result(sk, ctx); if (err) - goto unlock_free; + goto unlock_free_result; ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0); err = crypto_wait_req(crypto_ahash_final(&ctx->req), &ctx->wait); if (err) - goto unlock_free; + goto unlock_free_result; } goto done_more; } @@ -170,6 +170,7 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg, unlock_free: af_alg_free_sg(&ctx->sgl); +unlock_free_result: hash_free_result(sk, ctx); ctx->more = false; goto unlock; -- GitLab From 177fbbcb4ed6b306c1626a277fac3fb1c495a4c7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 13:14:13 +0100 Subject: [PATCH 0597/1405] wifi: cfg80211: detect stuck ECSA element in probe resp We recently added some validation that we don't try to connect to an AP that is currently in a channel switch process, since that might want the channel to be quiet or we might not be able to connect in time to hear the switching in a beacon. This was in commit c09c4f31998b ("wifi: mac80211: don't connect to an AP while it's in a CSA process"). However, we promptly got a report that this caused new connection failures, and it turns out that the AP that we now cannot connect to is permanently advertising an extended channel switch announcement, even with quiet. The AP in question was an Asus RT-AC53, with firmware 3.0.0.4.380_10760-g21a5898. As a first step, attempt to detect that we're dealing with such a situation, so mac80211 can use this later. Reported-by: coldolt Closes: https://lore.kernel.org/linux-wireless/CAJvGw+DQhBk_mHXeu6RTOds5iramMW2FbMB01VbKRA4YbHHDTA@mail.gmail.com/ Fixes: c09c4f31998b ("wifi: mac80211: don't connect to an AP while it's in a CSA process") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240129131413.246972c8775e.Ibf834d7f52f9951a353b6872383da710a7358338@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 +++ net/wireless/scan.c | 59 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index cf79656ce09ca..2b54fdd8ca15a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2910,6 +2910,8 @@ struct cfg80211_bss_ies { * own the beacon_ies, but they're just pointers to the ones from the * @hidden_beacon_bss struct) * @proberesp_ies: the information elements from the last Probe Response frame + * @proberesp_ecsa_stuck: ECSA element is stuck in the Probe Response frame, + * cannot rely on it having valid data * @hidden_beacon_bss: in case this BSS struct represents a probe response from * a BSS that hides the SSID in its beacon, this points to the BSS struct * that holds the beacon data. @beacon_ies is still valid, of course, and @@ -2950,6 +2952,8 @@ struct cfg80211_bss { u8 chains; s8 chain_signal[IEEE80211_MAX_CHAINS]; + u8 proberesp_ecsa_stuck:1; + u8 bssid_index; u8 max_bssid_indicator; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 2249b1a89d1c4..389a52c29bfc7 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1731,6 +1731,61 @@ static void cfg80211_update_hidden_bsses(struct cfg80211_internal_bss *known, } } +static void cfg80211_check_stuck_ecsa(struct cfg80211_registered_device *rdev, + struct cfg80211_internal_bss *known, + const struct cfg80211_bss_ies *old) +{ + const struct ieee80211_ext_chansw_ie *ecsa; + const struct element *elem_new, *elem_old; + const struct cfg80211_bss_ies *new, *bcn; + + if (known->pub.proberesp_ecsa_stuck) + return; + + new = rcu_dereference_protected(known->pub.proberesp_ies, + lockdep_is_held(&rdev->bss_lock)); + if (WARN_ON(!new)) + return; + + if (new->tsf - old->tsf < USEC_PER_SEC) + return; + + elem_old = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN, + old->data, old->len); + if (!elem_old) + return; + + elem_new = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN, + new->data, new->len); + if (!elem_new) + return; + + bcn = rcu_dereference_protected(known->pub.beacon_ies, + lockdep_is_held(&rdev->bss_lock)); + if (bcn && + cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN, + bcn->data, bcn->len)) + return; + + if (elem_new->datalen != elem_old->datalen) + return; + if (elem_new->datalen < sizeof(struct ieee80211_ext_chansw_ie)) + return; + if (memcmp(elem_new->data, elem_old->data, elem_new->datalen)) + return; + + ecsa = (void *)elem_new->data; + + if (!ecsa->mode) + return; + + if (ecsa->new_ch_num != + ieee80211_frequency_to_channel(known->pub.channel->center_freq)) + return; + + known->pub.proberesp_ecsa_stuck = 1; +} + static bool cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *known, @@ -1750,8 +1805,10 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, /* Override possible earlier Beacon frame IEs */ rcu_assign_pointer(known->pub.ies, new->pub.proberesp_ies); - if (old) + if (old) { + cfg80211_check_stuck_ecsa(rdev, known, old); kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); + } } if (rcu_access_pointer(new->pub.beacon_ies)) { -- GitLab From 35e2385dbe787936c793d70755a5177d267a40aa Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 13:14:14 +0100 Subject: [PATCH 0598/1405] wifi: mac80211: improve CSA/ECSA connection refusal As mentioned in the previous commit, we pretty quickly found that some APs have ECSA elements stuck in their probe response, so using that to not attempt to connect while CSA is happening we never connect to such an AP. Improve this situation by checking more carefully and ignoring the ECSA if cfg80211 has previously detected the ECSA element being stuck in the probe response. Additionally, allow connecting to an AP that's switching to a channel it's already using, unless it's using quiet mode. In this case, we may just have to adjust bandwidth later. If it's actually switching channels, it's better not to try to connect in the middle of that. Reported-by: coldolt Closes: https://lore.kernel.org/linux-wireless/CAJvGw+DQhBk_mHXeu6RTOds5iramMW2FbMB01VbKRA4YbHHDTA@mail.gmail.com/ Fixes: c09c4f31998b ("wifi: mac80211: don't connect to an AP while it's in a CSA process") Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240129131413.cc2d0a26226e.I682c016af76e35b6c47007db50e8554c5a426910@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 103 ++++++++++++++++++++++++++++++++------------ 1 file changed, 76 insertions(+), 27 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 073105deb4248..c62c7c6ce91fe 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7309,6 +7309,75 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, return err; } +static bool ieee80211_mgd_csa_present(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_bss_ies *ies, + u8 cur_channel, bool ignore_ecsa) +{ + const struct element *csa_elem, *ecsa_elem; + struct ieee80211_channel_sw_ie *csa = NULL; + struct ieee80211_ext_chansw_ie *ecsa = NULL; + + if (!ies) + return false; + + csa_elem = cfg80211_find_elem(WLAN_EID_CHANNEL_SWITCH, + ies->data, ies->len); + if (csa_elem && csa_elem->datalen == sizeof(*csa)) + csa = (void *)csa_elem->data; + + ecsa_elem = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN, + ies->data, ies->len); + if (ecsa_elem && ecsa_elem->datalen == sizeof(*ecsa)) + ecsa = (void *)ecsa_elem->data; + + if (csa && csa->count == 0) + csa = NULL; + if (csa && !csa->mode && csa->new_ch_num == cur_channel) + csa = NULL; + + if (ecsa && ecsa->count == 0) + ecsa = NULL; + if (ecsa && !ecsa->mode && ecsa->new_ch_num == cur_channel) + ecsa = NULL; + + if (ignore_ecsa && ecsa) { + sdata_info(sdata, + "Ignoring ECSA in probe response - was considered stuck!\n"); + return csa; + } + + return csa || ecsa; +} + +static bool ieee80211_mgd_csa_in_process(struct ieee80211_sub_if_data *sdata, + struct cfg80211_bss *bss) +{ + u8 cur_channel; + bool ret; + + cur_channel = ieee80211_frequency_to_channel(bss->channel->center_freq); + + rcu_read_lock(); + if (ieee80211_mgd_csa_present(sdata, + rcu_dereference(bss->beacon_ies), + cur_channel, false)) { + ret = true; + goto out; + } + + if (ieee80211_mgd_csa_present(sdata, + rcu_dereference(bss->proberesp_ies), + cur_channel, bss->proberesp_ecsa_stuck)) { + ret = true; + goto out; + } + + ret = false; +out: + rcu_read_unlock(); + return ret; +} + /* config hooks */ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, struct cfg80211_auth_request *req) @@ -7317,7 +7386,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_mgd_auth_data *auth_data; struct ieee80211_link_data *link; - const struct element *csa_elem, *ecsa_elem; u16 auth_alg; int err; bool cont_auth; @@ -7360,21 +7428,10 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, if (ifmgd->assoc_data) return -EBUSY; - rcu_read_lock(); - csa_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_CHANNEL_SWITCH); - ecsa_elem = ieee80211_bss_get_elem(req->bss, - WLAN_EID_EXT_CHANSWITCH_ANN); - if ((csa_elem && - csa_elem->datalen == sizeof(struct ieee80211_channel_sw_ie) && - ((struct ieee80211_channel_sw_ie *)csa_elem->data)->count != 0) || - (ecsa_elem && - ecsa_elem->datalen == sizeof(struct ieee80211_ext_chansw_ie) && - ((struct ieee80211_ext_chansw_ie *)ecsa_elem->data)->count != 0)) { - rcu_read_unlock(); + if (ieee80211_mgd_csa_in_process(sdata, req->bss)) { sdata_info(sdata, "AP is in CSA process, reject auth\n"); return -EINVAL; } - rcu_read_unlock(); auth_data = kzalloc(sizeof(*auth_data) + req->auth_data_len + req->ie_len, GFP_KERNEL); @@ -7684,7 +7741,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_mgd_assoc_data *assoc_data; - const struct element *ssid_elem, *csa_elem, *ecsa_elem; + const struct element *ssid_elem; struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg; ieee80211_conn_flags_t conn_flags = 0; struct ieee80211_link_data *link; @@ -7707,23 +7764,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, cbss = req->link_id < 0 ? req->bss : req->links[req->link_id].bss; - rcu_read_lock(); - ssid_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_SSID); - if (!ssid_elem || ssid_elem->datalen > sizeof(assoc_data->ssid)) { - rcu_read_unlock(); + if (ieee80211_mgd_csa_in_process(sdata, cbss)) { + sdata_info(sdata, "AP is in CSA process, reject assoc\n"); kfree(assoc_data); return -EINVAL; } - csa_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_CHANNEL_SWITCH); - ecsa_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_EXT_CHANSWITCH_ANN); - if ((csa_elem && - csa_elem->datalen == sizeof(struct ieee80211_channel_sw_ie) && - ((struct ieee80211_channel_sw_ie *)csa_elem->data)->count != 0) || - (ecsa_elem && - ecsa_elem->datalen == sizeof(struct ieee80211_ext_chansw_ie) && - ((struct ieee80211_ext_chansw_ie *)ecsa_elem->data)->count != 0)) { - sdata_info(sdata, "AP is in CSA process, reject assoc\n"); + rcu_read_lock(); + ssid_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_SSID); + if (!ssid_elem || ssid_elem->datalen > sizeof(assoc_data->ssid)) { rcu_read_unlock(); kfree(assoc_data); return -EINVAL; -- GitLab From 9480adfe4e0f0319b9da04b44e4eebd5ad07e0cd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 15:53:48 +0100 Subject: [PATCH 0599/1405] wifi: mac80211: fix RCU use in TDLS fast-xmit This looks up the link under RCU protection, but isn't guaranteed to actually have protection. Fix that. Fixes: 8cc07265b691 ("wifi: mac80211: handle TDLS data frames with MLO") Link: https://msgid.link/20240129155348.8a9c0b1e1d89.I553f96ce953bb41b0b877d592056164dec20d01c@changeid Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 68a48abc72876..e448ab3384489 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3100,10 +3100,11 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) /* DA SA BSSID */ build.da_offs = offsetof(struct ieee80211_hdr, addr1); build.sa_offs = offsetof(struct ieee80211_hdr, addr2); + rcu_read_lock(); link = rcu_dereference(sdata->link[tdls_link_id]); - if (WARN_ON_ONCE(!link)) - break; - memcpy(hdr->addr3, link->u.mgd.bssid, ETH_ALEN); + if (!WARN_ON_ONCE(!link)) + memcpy(hdr->addr3, link->u.mgd.bssid, ETH_ALEN); + rcu_read_unlock(); build.hdr_len = 24; break; } -- GitLab From dd6c064cfc3fc18d871107c6f5db8837e88572e4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 15:53:55 +0100 Subject: [PATCH 0600/1405] wifi: mac80211: set station RX-NSS on reconfig When a station is added/reconfigured by userspace, e.g. a TDLS peer or a SoftAP client STA, rx_nss is currently not always set, so that it might be left zero. Set it up properly. Link: https://msgid.link/20240129155354.98f148a3d654.I193a02155f557ea54dc9d0232da66cf96734119a@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 489dd97f51724..321698012e122 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1869,6 +1869,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, sband->band); } + ieee80211_sta_set_rx_nss(link_sta); + return ret; } -- GitLab From 733c498a80853acbafe284a40468b91f4d41f0b4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 15:54:02 +0100 Subject: [PATCH 0601/1405] wifi: mac80211: fix driver debugfs for vif type change If a driver implements the change_interface() method, we switch interface type without taking the interface down, but still will recreate the debugfs for it since it's a new type. As such, we should use the ieee80211_debugfs_recreate_netdev() function here to also recreate the driver's files, if it is indeed from a type change while up. Link: https://msgid.link/20240129155402.7311a36ffeeb.I18df02bbeb685d4250911de5ffbaf090f60c3803@changeid Signed-off-by: Johannes Berg --- net/mac80211/debugfs_netdev.c | 4 ++-- net/mac80211/debugfs_netdev.h | 5 ----- net/mac80211/iface.c | 2 +- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index dce5606ed66da..68596ef78b15e 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -997,8 +997,8 @@ static void add_link_files(struct ieee80211_link_data *link, } } -void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata, - bool mld_vif) +static void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata, + bool mld_vif) { char buf[10+IFNAMSIZ]; diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h index b226b1aae88a5..a02ec0a413f61 100644 --- a/net/mac80211/debugfs_netdev.h +++ b/net/mac80211/debugfs_netdev.h @@ -11,8 +11,6 @@ #include "ieee80211_i.h" #ifdef CONFIG_MAC80211_DEBUGFS -void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata, - bool mld_vif); void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_recreate_netdev(struct ieee80211_sub_if_data *sdata, @@ -24,9 +22,6 @@ void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link); void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link); void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link); #else -static inline void ieee80211_debugfs_add_netdev( - struct ieee80211_sub_if_data *sdata, bool mld_vif) -{} static inline void ieee80211_debugfs_remove_netdev( struct ieee80211_sub_if_data *sdata) {} diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index e4e7c0b38cb6e..11c4caa4748e4 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1783,7 +1783,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, /* need to do this after the switch so vif.type is correct */ ieee80211_link_setup(&sdata->deflink); - ieee80211_debugfs_add_netdev(sdata, false); + ieee80211_debugfs_recreate_netdev(sdata, false); } static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, -- GitLab From 86b2dac224f963be92634a878888222e1e938f48 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:54:05 +0100 Subject: [PATCH 0602/1405] wifi: mac80211: initialize SMPS mode correctly The SMPS mode is currently re-initialized too late, since ieee80211_prep_channel() can be called again after we've already done ieee80211_setup_assoc_link(), in case there's some override of the channel configuration. Fix this. Link: https://msgid.link/20240129195405.d6d74508be18.I0a7303b1ce4d8e5436011951ab624372a445c069@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c62c7c6ce91fe..f1cdd2df323e7 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -8,7 +8,7 @@ * Copyright 2007, Michael Wu * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2023 Intel Corporation + * Copyright (C) 2018 - 2024 Intel Corporation */ #include @@ -2918,6 +2918,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, /* other links will be destroyed */ sdata->deflink.u.mgd.bss = NULL; + sdata->deflink.smps_mode = IEEE80211_SMPS_OFF; netif_carrier_off(sdata->dev); @@ -5045,9 +5046,6 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, if (!link) return 0; - /* will change later if needed */ - link->smps_mode = IEEE80211_SMPS_OFF; - /* * If this fails (possibly due to channel context sharing * on incompatible channels, e.g. 80+80 and 160 sharing the @@ -7096,6 +7094,7 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link) link->u.mgd.p2p_noa_index = -1; link->u.mgd.conn_flags = 0; link->conf->bssid = link->u.mgd.bssid; + link->smps_mode = IEEE80211_SMPS_OFF; wiphy_work_init(&link->u.mgd.request_smps_work, ieee80211_request_smps_mgd_work); -- GitLab From 178e9d6adc4356c2f1659f575ecea626e7fbd05a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:57:30 +0100 Subject: [PATCH 0603/1405] wifi: mac80211: fix unsolicited broadcast probe config There's a bug in ieee80211_set_unsol_bcast_probe_resp(), it tries to return BSS_CHANGED_UNSOL_BCAST_PROBE_RESP (which has the value 1<<31) in an int, which makes it negative and considered an error. Fix this by passing the changed flags to set separately. Fixes: 3b1c256eb4ae ("wifi: mac80211: fixes in FILS discovery updates") Reviewed-by: Jeff Johnson Link: https://msgid.link/20240129195729.965b0740bf80.I6bc6f5236863f686c17d689be541b1dd2633c417@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 321698012e122..327682995c926 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #include @@ -987,7 +987,8 @@ static int ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata, struct cfg80211_unsol_bcast_probe_resp *params, struct ieee80211_link_data *link, - struct ieee80211_bss_conf *link_conf) + struct ieee80211_bss_conf *link_conf, + u64 *changed) { struct unsol_bcast_probe_resp_data *new, *old = NULL; @@ -1011,7 +1012,8 @@ ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata, RCU_INIT_POINTER(link->u.ap.unsol_bcast_probe_resp, NULL); } - return BSS_CHANGED_UNSOL_BCAST_PROBE_RESP; + *changed |= BSS_CHANGED_UNSOL_BCAST_PROBE_RESP; + return 0; } static int ieee80211_set_ftm_responder_params( @@ -1450,10 +1452,9 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, err = ieee80211_set_unsol_bcast_probe_resp(sdata, ¶ms->unsol_bcast_probe_resp, - link, link_conf); + link, link_conf, &changed); if (err < 0) goto error; - changed |= err; err = drv_start_ap(sdata->local, sdata, link_conf); if (err) { @@ -1525,10 +1526,9 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, err = ieee80211_set_unsol_bcast_probe_resp(sdata, ¶ms->unsol_bcast_probe_resp, - link, link_conf); + link, link_conf, &changed); if (err < 0) return err; - changed |= err; if (beacon->he_bss_color_valid && beacon->he_bss_color.enabled != link_conf->he_bss_color.enabled) { -- GitLab From a0b4f2291319c5d47ecb196b90400814fdcfd126 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Jan 2024 16:48:56 +0100 Subject: [PATCH 0604/1405] wifi: mac80211: fix waiting for beacons logic This should be waiting if we don't have a beacon yet, but somehow I managed to invert the logic. Fix that. Fixes: 74e1309acedc ("wifi: mac80211: mlme: look up beacon elems only if needed") Link: https://msgid.link/20240131164856.922701229546.I239b379e7cee04608e73c016b737a5245e5b23dd@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f1cdd2df323e7..12f67871af698 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -8046,8 +8046,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); beacon_ies = rcu_dereference(req->bss->beacon_ies); - - if (beacon_ies) { + if (!beacon_ies) { /* * Wait up to one beacon interval ... * should this be more if we miss one? -- GitLab From c042600c17d8c490279f0ae2baee29475fe8047d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Jan 2024 16:48:23 +0100 Subject: [PATCH 0605/1405] wifi: mac80211: adding missing drv_mgd_complete_tx() call There's a call to drv_mgd_prepare_tx() and so there should be one to drv_mgd_complete_tx(), but on this path it's not. Add it. Link: https://msgid.link/20240131164824.2f0922a514e1.I5aac89b93bcead88c374187d70cad0599d29d2c8@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 12f67871af698..2022a26eb8811 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -8127,6 +8127,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, req->reason_code, false); + drv_mgd_complete_tx(sdata->local, sdata, &info); return 0; } -- GitLab From 62a6183c13319e4d2227473a04abd104c4f56dcf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 20:09:07 +0100 Subject: [PATCH 0606/1405] wifi: mac80211: accept broadcast probe responses on 6 GHz On the 6 GHz band, probe responses are sent as broadcast to optimise medium usage. However, without OCE configuration we weren't accepting them, which is wrong, even if wpa_s is by default enabling OCE. Accept them without the OCE config as well. Link: https://msgid.link/20240129200907.5a89c2821897.I92e9dfa0f9b350bc7f37dd4bb38031d156d78d8a@changeid Signed-off-by: Johannes Berg --- net/mac80211/scan.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 645355e5f1bc7..f9d5842601fa9 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #include @@ -237,14 +237,18 @@ ieee80211_bss_info_update(struct ieee80211_local *local, } static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata, + struct ieee80211_channel *channel, u32 scan_flags, const u8 *da) { if (!sdata) return false; - /* accept broadcast for OCE */ - if (scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP && - is_broadcast_ether_addr(da)) + + /* accept broadcast on 6 GHz and for OCE */ + if (is_broadcast_ether_addr(da) && + (channel->band == NL80211_BAND_6GHZ || + scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP)) return true; + if (scan_flags & NL80211_SCAN_FLAG_RANDOM_ADDR) return true; return ether_addr_equal(da, sdata->vif.addr); @@ -293,6 +297,12 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0); } + channel = ieee80211_get_channel_khz(local->hw.wiphy, + ieee80211_rx_status_to_khz(rx_status)); + + if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) + return; + if (ieee80211_is_probe_resp(mgmt->frame_control)) { struct cfg80211_scan_request *scan_req; struct cfg80211_sched_scan_request *sched_scan_req; @@ -310,19 +320,15 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) /* ignore ProbeResp to foreign address or non-bcast (OCE) * unless scanning with randomised address */ - if (!ieee80211_scan_accept_presp(sdata1, scan_req_flags, + if (!ieee80211_scan_accept_presp(sdata1, channel, + scan_req_flags, mgmt->da) && - !ieee80211_scan_accept_presp(sdata2, sched_scan_req_flags, + !ieee80211_scan_accept_presp(sdata2, channel, + sched_scan_req_flags, mgmt->da)) return; } - channel = ieee80211_get_channel_khz(local->hw.wiphy, - ieee80211_rx_status_to_khz(rx_status)); - - if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) - return; - bss = ieee80211_bss_info_update(local, rx_status, mgmt, skb->len, channel); -- GitLab From fe92f874f09145a6951deacaa4961390238bbe0d Mon Sep 17 00:00:00 2001 From: Michael Lass Date: Wed, 31 Jan 2024 16:52:20 +0100 Subject: [PATCH 0607/1405] net: Fix from address in memcpy_to_iter_csum() While inlining csum_and_memcpy() into memcpy_to_iter_csum(), the from address passed to csum_partial_copy_nocheck() was accidentally changed. This causes a regression in applications using UDP, as for example OpenAFS, causing loss of datagrams. Fixes: dc32bff195b4 ("iov_iter, net: Fold in csum_and_memcpy()") Cc: David Howells Cc: stable@vger.kernel.org Cc: regressions@lists.linux.dev Signed-off-by: Michael Lass Reviewed-by: Jeffrey Altman Acked-by: David Howells Signed-off-by: David S. Miller --- net/core/datagram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index 103d46fa0eeb3..a8b625abe242c 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -751,7 +751,7 @@ size_t memcpy_to_iter_csum(void *iter_to, size_t progress, size_t len, void *from, void *priv2) { __wsum *csum = priv2; - __wsum next = csum_partial_copy_nocheck(from, iter_to, len); + __wsum next = csum_partial_copy_nocheck(from + progress, iter_to, len); *csum = csum_block_add(*csum, next, progress); return 0; -- GitLab From 1fa942f31665ea5dc5d4d95893dd13723eaa97cc Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 28 Jan 2024 08:53:57 +0200 Subject: [PATCH 0608/1405] wifi: iwlwifi: mvm: fix a battery life regression Fix the DBG_CONFIG_TOKEN to not enable debug components that would prevent the device to save power. Fixes: fc2fe0a5e856 ("wifi: iwlwifi: fw: disable firmware debug asserts") Cc: stable@vger.kernel.org Signed-off-by: Emmanuel Grumbach Reviewed-by: Eilon Rinat Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240128084842.90d2600edc27.Id657ea2f0ddb131f5f9d0ac39aeb8c88754fe54b@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/fw/api/debug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h b/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h index 798731ecbefde..b740c65a7dca2 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h @@ -537,7 +537,7 @@ enum iwl_fw_dbg_config_cmd_type { }; /* LDBG_CFG_CMD_TYPE_API_E_VER_1 */ /* this token disables debug asserts in the firmware */ -#define IWL_FW_DBG_CONFIG_TOKEN 0x00011301 +#define IWL_FW_DBG_CONFIG_TOKEN 0x00010001 /** * struct iwl_fw_dbg_config_cmd - configure FW debug -- GitLab From 16867c38bcd3be2eb9016a3198a096f93959086e Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Thu, 1 Feb 2024 16:17:39 +0200 Subject: [PATCH 0609/1405] wifi: iwlwifi: exit eSR only after the FW does Currently the driver exits eSR by calling iwl_mvm_esr_mode_inactive() before updating the FW (by deactivating one of the links), and therefore before sending the EML frame notifying that we are no longer in eSR. This is wrong for several reasons: 1. The driver sends SMPS activation frames when we are still in eSR and SMPS should be disabled when in eSR 2. The driver restores RLC configuration as it was before eSR entering, and RLC command shouldn't be sent in eSR Fix this by calling iwl_mvm_esr_mode_inactive() after FW update Fixes: 12bacfc2c065 ("wifi: iwlwifi: handle eSR transitions") Signed-off-by: Miri Korenblit Reviewed-by: Ilan Peer Reviewed-by: Gregory Greenman Link: https://msgid.link/20240201155157.d8d9dc277d4e.Ib5aee0fd05e35b1da7f18753eb3c8fa0a3f872f3@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c index 1f36e934ef69e..893b69fc841b8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c @@ -438,6 +438,9 @@ __iwl_mvm_mld_unassign_vif_chanctx(struct iwl_mvm *mvm, mvmvif->ap_ibss_active = false; } + iwl_mvm_link_changed(mvm, vif, link_conf, + LINK_CONTEXT_MODIFY_ACTIVE, false); + if (iwl_mvm_is_esr_supported(mvm->fwrt.trans) && n_active > 1) { int ret = iwl_mvm_esr_mode_inactive(mvm, vif); @@ -449,9 +452,6 @@ __iwl_mvm_mld_unassign_vif_chanctx(struct iwl_mvm *mvm, if (vif->type == NL80211_IFTYPE_MONITOR) iwl_mvm_mld_rm_snif_sta(mvm, vif); - iwl_mvm_link_changed(mvm, vif, link_conf, - LINK_CONTEXT_MODIFY_ACTIVE, false); - if (switching_chanctx) return; mvmvif->link[link_id]->phy_ctxt = NULL; -- GitLab From a23c0af103e184bb1252dddddda040f6641bea7b Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Thu, 1 Feb 2024 16:17:30 +0200 Subject: [PATCH 0610/1405] wifi: iwlwifi: do not announce EPCS support mac80211 does not have proper support for EPCS currently as that would require changing the ECDA parameters if EPCS (Emergency Preparedness Communications Service) is in use. As such, do not announce support for it in the capabilities. Signed-off-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240201155157.59d71656addc.Idde91b3018239c49fc6ed231b411d05354fb9fb1@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 4028969886869..2f6774ec37b22 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -668,7 +668,6 @@ static const struct ieee80211_sband_iftype_data iwl_he_eht_capa[] = { .has_eht = true, .eht_cap_elem = { .mac_cap_info[0] = - IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS | IEEE80211_EHT_MAC_CAP0_OM_CONTROL | IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 | IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2 | @@ -793,7 +792,6 @@ static const struct ieee80211_sband_iftype_data iwl_he_eht_capa[] = { .has_eht = true, .eht_cap_elem = { .mac_cap_info[0] = - IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS | IEEE80211_EHT_MAC_CAP0_OM_CONTROL | IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 | IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2, @@ -1020,8 +1018,7 @@ iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans, if (CSR_HW_REV_TYPE(trans->hw_rev) == IWL_CFG_MAC_TYPE_GL && iftype_data->eht_cap.has_eht) { iftype_data->eht_cap.eht_cap_elem.mac_cap_info[0] &= - ~(IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS | - IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 | + ~(IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 | IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2); iftype_data->eht_cap.eht_cap_elem.phy_cap_info[3] &= ~(IEEE80211_EHT_PHY_CAP0_PARTIAL_BW_UL_MU_MIMO | -- GitLab From 7fddac12c38237252431d5b8af7b6d5771b6d125 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 2 Feb 2024 01:56:33 -0800 Subject: [PATCH 0611/1405] driver core: Fix device_link_flag_is_sync_state_only() device_link_flag_is_sync_state_only() correctly returns true on the flags of an existing device link that only implements sync_state() functionality. However, it incorrectly and confusingly returns false if it's called with DL_FLAG_SYNC_STATE_ONLY. This bug doesn't manifest in any of the existing calls to this function, but fix this confusing behavior to avoid future bugs. Fixes: 67cad5c67019 ("driver core: fw_devlink: Add DL_FLAG_CYCLE support to device links") Signed-off-by: Saravana Kannan Tested-by: Xu Yang Link: https://lore.kernel.org/r/20240202095636.868578-2-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 14d46af40f9a1..52215c4c72096 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -284,10 +284,12 @@ static bool device_is_ancestor(struct device *dev, struct device *target) return false; } +#define DL_MARKER_FLAGS (DL_FLAG_INFERRED | \ + DL_FLAG_CYCLE | \ + DL_FLAG_MANAGED) static inline bool device_link_flag_is_sync_state_only(u32 flags) { - return (flags & ~(DL_FLAG_INFERRED | DL_FLAG_CYCLE)) == - (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED); + return (flags & ~DL_MARKER_FLAGS) == DL_FLAG_SYNC_STATE_ONLY; } /** -- GitLab From 6442d79d880cf7a2fff18779265d657fef0cce4c Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 2 Feb 2024 01:56:34 -0800 Subject: [PATCH 0612/1405] driver core: fw_devlink: Improve detection of overlapping cycles fw_devlink can detect most overlapping/intersecting cycles. However it was missing a few corner cases because of an incorrect optimization logic that tries to avoid repeating cycle detection for devices that are already marked as part of a cycle. Here's an example provided by Xu Yang (edited for clarity): usb +-----+ tcpc | | +-----+ | +--| | |----------->|EP| |--+ | | +--| |EP|<-----------| | |--+ | | B | | | +-----+ | A | | +-----+ | ^ +-----+ | | | | | +-----| C |<--+ | | +-----+ usb-phy Node A (tcpc) will be populated as device 1-0050. Node B (usb) will be populated as device 38100000.usb. Node C (usb-phy) will be populated as device 381f0040.usb-phy. The description below uses the notation: consumer --> supplier child ==> parent 1. Node C is populated as device C. No cycles detected because cycle detection is only run when a fwnode link is converted to a device link. 2. Node B is populated as device B. As we convert B --> C into a device link we run cycle detection and find and mark the device link/fwnode link cycle: C--> A --> B.EP ==> B --> C 3. Node A is populated as device A. As we convert C --> A into a device link, we see it's already part of a cycle (from step 2) and don't run cycle detection. Thus we miss detecting the cycle: A --> B.EP ==> B --> A.EP ==> A Looking at it another way, A depends on B in one way: A --> B.EP ==> B But B depends on A in two ways and we only detect the first: B --> C --> A B --> A.EP ==> A To detect both of these, we remove the incorrect optimization attempt in step 3 and run cycle detection even if the fwnode link from which the device link is being created has already been marked as part of a cycle. Reported-by: Xu Yang Closes: https://lore.kernel.org/lkml/DU2PR04MB8822693748725F85DC0CB86C8C792@DU2PR04MB8822.eurprd04.prod.outlook.com/ Fixes: 3fb16866b51d ("driver core: fw_devlink: Make cycle detection more robust") Signed-off-by: Saravana Kannan Tested-by: Xu Yang Link: https://lore.kernel.org/r/20240202095636.868578-3-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 52215c4c72096..e3d666461835b 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2060,9 +2060,14 @@ static int fw_devlink_create_devlink(struct device *con, /* * SYNC_STATE_ONLY device links don't block probing and supports cycles. - * So cycle detection isn't necessary and shouldn't be done. + * So, one might expect that cycle detection isn't necessary for them. + * However, if the device link was marked as SYNC_STATE_ONLY because + * it's part of a cycle, then we still need to do cycle detection. This + * is because the consumer and supplier might be part of multiple cycles + * and we need to detect all those cycles. */ - if (!(flags & DL_FLAG_SYNC_STATE_ONLY)) { + if (!device_link_flag_is_sync_state_only(flags) || + flags & DL_FLAG_CYCLE) { device_links_write_lock(); if (__fw_devlink_relax_cycles(con, sup_handle)) { __fwnode_link_cycle(link); -- GitLab From 6e7ad1aebb4fc9fed0217dd50ef6e58a53f17d81 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 2 Feb 2024 01:56:35 -0800 Subject: [PATCH 0613/1405] driver core: fw_devlink: Improve logs for cycle detection The links in a cycle are not all logged in a consistent manner or not logged at all. Make them consistent by adding a "cycle:" string and log all the link in the cycles (even the child ==> parent dependency) so that it's easier to debug cycle detection code. Also, mark the start and end of a cycle so it's easy to tell when multiple cycles are logged back to back. Signed-off-by: Saravana Kannan Tested-by: Xu Yang Link: https://lore.kernel.org/r/20240202095636.868578-4-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index e3d666461835b..9828da9b933cb 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -125,7 +125,7 @@ static void __fwnode_link_del(struct fwnode_link *link) */ static void __fwnode_link_cycle(struct fwnode_link *link) { - pr_debug("%pfwf: Relaxing link with %pfwf\n", + pr_debug("%pfwf: cycle: depends on %pfwf\n", link->consumer, link->supplier); link->flags |= FWLINK_FLAG_CYCLE; } @@ -1945,6 +1945,7 @@ static bool __fw_devlink_relax_cycles(struct device *con, /* Termination condition. */ if (sup_dev == con) { + pr_debug("----- cycle: start -----\n"); ret = true; goto out; } @@ -1976,8 +1977,11 @@ static bool __fw_devlink_relax_cycles(struct device *con, else par_dev = fwnode_get_next_parent_dev(sup_handle); - if (par_dev && __fw_devlink_relax_cycles(con, par_dev->fwnode)) + if (par_dev && __fw_devlink_relax_cycles(con, par_dev->fwnode)) { + pr_debug("%pfwf: cycle: child of %pfwf\n", sup_handle, + par_dev->fwnode); ret = true; + } if (!sup_dev) goto out; @@ -1993,6 +1997,8 @@ static bool __fw_devlink_relax_cycles(struct device *con, if (__fw_devlink_relax_cycles(con, dev_link->supplier->fwnode)) { + pr_debug("%pfwf: cycle: depends on %pfwf\n", sup_handle, + dev_link->supplier->fwnode); fw_devlink_relax_link(dev_link); dev_link->flags |= DL_FLAG_CYCLE; ret = true; @@ -2072,6 +2078,7 @@ static int fw_devlink_create_devlink(struct device *con, if (__fw_devlink_relax_cycles(con, sup_handle)) { __fwnode_link_cycle(link); flags = fw_devlink_get_flags(link->flags); + pr_debug("----- cycle: end -----\n"); dev_info(con, "Fixed dependency cycle(s) with %pfwf\n", sup_handle); } -- GitLab From bd6081be2251c3700eca8a7bbe071e1bb8cd2af4 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 30 Jan 2024 22:02:16 +0530 Subject: [PATCH 0614/1405] dmaengine: at_hdmac: add missing kernel-doc style description We get following warning with W=1: drivers/dma/at_hdmac.c:243: warning: Function parameter or struct member 'boundary' not described in 'at_desc' drivers/dma/at_hdmac.c:243: warning: Function parameter or struct member 'dst_hole' not described in 'at_desc' drivers/dma/at_hdmac.c:243: warning: Function parameter or struct member 'src_hole' not described in 'at_desc' drivers/dma/at_hdmac.c:243: warning: Function parameter or struct member 'memset_buffer' not described in 'at_desc' drivers/dma/at_hdmac.c:243: warning: Function parameter or struct member 'memset_paddr' not described in 'at_desc' drivers/dma/at_hdmac.c:243: warning: Function parameter or struct member 'memset_vaddr' not described in 'at_desc' drivers/dma/at_hdmac.c:255: warning: Enum value 'ATC_IS_PAUSED' not described in enum 'atc_status' drivers/dma/at_hdmac.c:255: warning: Enum value 'ATC_IS_CYCLIC' not described in enum 'atc_status' drivers/dma/at_hdmac.c:287: warning: Function parameter or struct member 'cyclic' not described in 'at_dma_chan' drivers/dma/at_hdmac.c:350: warning: Function parameter or struct member 'memset_pool' not described in 'at_dma' Fix this by adding the required description and also drop unused struct member 'cyclic' in 'at_dma_chan' Reviewed-by: Tudor Ambarus Signed-off-by: Vinod Koul Link: https://lore.kernel.org/r/20240130163216.633034-1-vkoul@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/at_hdmac.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 6bad536e04924..40052d1bd0b5c 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -224,6 +224,12 @@ struct atdma_sg { * @total_len: total transaction byte count * @sglen: number of sg entries. * @sg: array of sgs. + * @boundary: number of transfers to perform before the automatic address increment operation + * @dst_hole: value to add to the destination address when the boundary has been reached + * @src_hole: value to add to the source address when the boundary has been reached + * @memset_buffer: buffer used for the memset operation + * @memset_paddr: physical address of the buffer used for the memset operation + * @memset_vaddr: virtual address of the buffer used for the memset operation */ struct at_desc { struct virt_dma_desc vd; @@ -247,6 +253,9 @@ struct at_desc { /** * enum atc_status - information bits stored in channel status flag * + * @ATC_IS_PAUSED: If channel is pauses + * @ATC_IS_CYCLIC: If channel is cyclic + * * Manipulated with atomic operations. */ enum atc_status { @@ -282,7 +291,6 @@ struct at_dma_chan { u32 save_cfg; u32 save_dscr; struct dma_slave_config dma_sconfig; - bool cyclic; struct at_desc *desc; }; @@ -333,6 +341,7 @@ static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width) * @save_imr: interrupt mask register that is saved on suspend/resume cycle * @all_chan_mask: all channels availlable in a mask * @lli_pool: hw lli table + * @memset_pool: hw memset pool * @chan: channels table to store at_dma_chan structures */ struct at_dma { -- GitLab From e9f1e6bb55bea4f8cb48f8f7443bbac99b60d285 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 2 Feb 2024 17:11:25 +0100 Subject: [PATCH 0615/1405] Revert "gfs2: Use GL_NOBLOCK flag for non-blocking lookups" Commit "gfs2: Use GL_NOBLOCK flag for non-blocking lookups" has several issues, some of which are non-trivial to fix, so revert it for now: https://lore.kernel.org/gfs2/20240202050230.GA875515@ZenIV/T/ This reverts commit dd00aaeb343255a8a30de671bd27bde79a47c8e5. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/dentry.c | 23 +++++++++-------------- fs/gfs2/inode.c | 8 ++++---- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 177f1f41f2254..2e215e8c3c88e 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -32,25 +32,21 @@ static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags) { - struct dentry *parent = NULL; + struct dentry *parent; struct gfs2_sbd *sdp; struct gfs2_inode *dip; - struct inode *dinode, *inode; + struct inode *inode; struct gfs2_holder d_gh; struct gfs2_inode *ip = NULL; int error, valid = 0; int had_lock = 0; - if (flags & LOOKUP_RCU) { - dinode = d_inode_rcu(READ_ONCE(dentry->d_parent)); - if (!dinode) - return -ECHILD; - } else { - parent = dget_parent(dentry); - dinode = d_inode(parent); - } - sdp = GFS2_SB(dinode); - dip = GFS2_I(dinode); + if (flags & LOOKUP_RCU) + return -ECHILD; + + parent = dget_parent(dentry); + sdp = GFS2_SB(d_inode(parent)); + dip = GFS2_I(d_inode(parent)); inode = d_inode(dentry); if (inode) { @@ -66,8 +62,7 @@ static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags) had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL); if (!had_lock) { - error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, - flags & LOOKUP_RCU ? GL_NOBLOCK : 0, &d_gh); + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); if (error) goto out; } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 6bfc9383b7b8e..1b95db2c3aac3 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1882,10 +1882,10 @@ int gfs2_permission(struct mnt_idmap *idmap, struct inode *inode, WARN_ON_ONCE(!may_not_block); return -ECHILD; } - if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { - int noblock = may_not_block ? GL_NOBLOCK : 0; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, - LM_FLAG_ANY | noblock, &i_gh); + if (gfs2_glock_is_locked_by_me(gl) == NULL) { + if (may_not_block) + return -ECHILD; + error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; } -- GitLab From 5bdda0048c8d1bbe2019513b2d6200cc0d09c7bd Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 26 Jan 2024 14:31:53 -0800 Subject: [PATCH 0616/1405] wifi: brcmfmac: Adjust n_channels usage for __counted_by After commit e3eac9f32ec0 ("wifi: cfg80211: Annotate struct cfg80211_scan_request with __counted_by"), the compiler may enforce dynamic array indexing of req->channels to stay below n_channels. As a result, n_channels needs to be increased _before_ accessing the newly added array index. Increment it first, then use "i" for the prior index. Solves this warning in the coming GCC that has __counted_by support: ../drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c: In function 'brcmf_internal_escan_add_info': ../drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c:3783:46: warning: operation on 'req-> n_channels' may be undefined [-Wsequence-point] 3783 | req->channels[req->n_channels++] = chan; | ~~~~~~~~~~~~~~~^~ Fixes: e3eac9f32ec0 ("wifi: cfg80211: Annotate struct cfg80211_scan_request with __counted_by") Cc: Arend van Spriel Cc: Franky Lin Cc: Hante Meuleman Cc: Kalle Valo Cc: Chi-hsien Lin Cc: Ian Lin Cc: Johannes Berg Cc: Wright Feng Cc: Hector Martin Cc: linux-wireless@vger.kernel.org Cc: brcm80211-dev-list.pdl@broadcom.com Signed-off-by: Kees Cook Reviewed-by: Hans de Goede Reviewed-by: Linus Walleij Reviewed-by: Gustavo A. R. Silva Signed-off-by: Kalle Valo Link: https://msgid.link/20240126223150.work.548-kees@kernel.org --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 133c5ea6429cd..28d6a30cc0106 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -3779,8 +3779,10 @@ static int brcmf_internal_escan_add_info(struct cfg80211_scan_request *req, if (req->channels[i] == chan) break; } - if (i == req->n_channels) - req->channels[req->n_channels++] = chan; + if (i == req->n_channels) { + req->n_channels++; + req->channels[i] = chan; + } for (i = 0; i < req->n_ssids; i++) { if (req->ssids[i].ssid_len == ssid_len && -- GitLab From 0a9bab391e336489169b95cb0d4553d921302189 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 31 Jan 2024 21:57:27 +0100 Subject: [PATCH 0617/1405] dm-crypt, dm-verity: disable tasklets Tasklets have an inherent problem with memory corruption. The function tasklet_action_common calls tasklet_trylock, then it calls the tasklet callback and then it calls tasklet_unlock. If the tasklet callback frees the structure that contains the tasklet or if it calls some code that may free it, tasklet_unlock will write into free memory. The commits 8e14f610159d and d9a02e016aaf try to fix it for dm-crypt, but it is not a sufficient fix and the data corruption can still happen [1]. There is no fix for dm-verity and dm-verity will write into free memory with every tasklet-processed bio. There will be atomic workqueues implemented in the kernel 6.9 [2]. They will have better interface and they will not suffer from the memory corruption problem. But we need something that stops the memory corruption now and that can be backported to the stable kernels. So, I'm proposing this commit that disables tasklets in both dm-crypt and dm-verity. This commit doesn't remove the tasklet support, because the tasklet code will be reused when atomic workqueues will be implemented. [1] https://lore.kernel.org/all/d390d7ee-f142-44d3-822a-87949e14608b@suse.de/T/ [2] https://lore.kernel.org/lkml/20240130091300.2968534-1-tj@kernel.org/ Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Fixes: 39d42fa96ba1b ("dm crypt: add flags to optionally bypass kcryptd workqueues") Fixes: 5721d4e5a9cdb ("dm verity: Add optional "try_verify_in_tasklet" feature") Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 38 ++--------------------------------- drivers/md/dm-verity-target.c | 26 ++---------------------- drivers/md/dm-verity.h | 1 - 3 files changed, 4 insertions(+), 61 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 855b482cbff1f..f745f85082434 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -73,10 +73,8 @@ struct dm_crypt_io { struct bio *base_bio; u8 *integrity_metadata; bool integrity_metadata_from_pool:1; - bool in_tasklet:1; struct work_struct work; - struct tasklet_struct tasklet; struct convert_context ctx; @@ -1762,7 +1760,6 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, io->ctx.r.req = NULL; io->integrity_metadata = NULL; io->integrity_metadata_from_pool = false; - io->in_tasklet = false; atomic_set(&io->io_pending, 0); } @@ -1771,13 +1768,6 @@ static void crypt_inc_pending(struct dm_crypt_io *io) atomic_inc(&io->io_pending); } -static void kcryptd_io_bio_endio(struct work_struct *work) -{ - struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); - - bio_endio(io->base_bio); -} - /* * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. @@ -1801,20 +1791,6 @@ static void crypt_dec_pending(struct dm_crypt_io *io) base_bio->bi_status = error; - /* - * If we are running this function from our tasklet, - * we can't call bio_endio() here, because it will call - * clone_endio() from dm.c, which in turn will - * free the current struct dm_crypt_io structure with - * our tasklet. In this case we need to delay bio_endio() - * execution to after the tasklet is done and dequeued. - */ - if (io->in_tasklet) { - INIT_WORK(&io->work, kcryptd_io_bio_endio); - queue_work(cc->io_queue, &io->work); - return; - } - bio_endio(base_bio); } @@ -2246,11 +2222,6 @@ static void kcryptd_crypt(struct work_struct *work) kcryptd_crypt_write_convert(io); } -static void kcryptd_crypt_tasklet(unsigned long work) -{ - kcryptd_crypt((struct work_struct *)work); -} - static void kcryptd_queue_crypt(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; @@ -2262,15 +2233,10 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io) * irqs_disabled(): the kernel may run some IO completion from the idle thread, but * it is being executed with irqs disabled. */ - if (in_hardirq() || irqs_disabled()) { - io->in_tasklet = true; - tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work); - tasklet_schedule(&io->tasklet); + if (!(in_hardirq() || irqs_disabled())) { + kcryptd_crypt(&io->work); return; } - - kcryptd_crypt(&io->work); - return; } INIT_WORK(&io->work, kcryptd_crypt); diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 14e58ae705218..82662f5769c4a 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -645,23 +645,6 @@ static void verity_work(struct work_struct *w) verity_finish_io(io, errno_to_blk_status(verity_verify_io(io))); } -static void verity_tasklet(unsigned long data) -{ - struct dm_verity_io *io = (struct dm_verity_io *)data; - int err; - - io->in_tasklet = true; - err = verity_verify_io(io); - if (err == -EAGAIN || err == -ENOMEM) { - /* fallback to retrying with work-queue */ - INIT_WORK(&io->work, verity_work); - queue_work(io->v->verify_wq, &io->work); - return; - } - - verity_finish_io(io, errno_to_blk_status(err)); -} - static void verity_end_io(struct bio *bio) { struct dm_verity_io *io = bio->bi_private; @@ -674,13 +657,8 @@ static void verity_end_io(struct bio *bio) return; } - if (static_branch_unlikely(&use_tasklet_enabled) && io->v->use_tasklet) { - tasklet_init(&io->tasklet, verity_tasklet, (unsigned long)io); - tasklet_schedule(&io->tasklet); - } else { - INIT_WORK(&io->work, verity_work); - queue_work(io->v->verify_wq, &io->work); - } + INIT_WORK(&io->work, verity_work); + queue_work(io->v->verify_wq, &io->work); } /* diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index f9d522c870e61..f3f6070084196 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -83,7 +83,6 @@ struct dm_verity_io { struct bvec_iter iter; struct work_struct work; - struct tasklet_struct tasklet; /* * Three variably-size fields follow this struct: -- GitLab From a60e6c3918d20848906ffcdfcf72ca6a8cfbcf2e Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Tue, 5 Dec 2023 17:36:01 +0100 Subject: [PATCH 0618/1405] Input: i8042 - fix strange behavior of touchpad on Clevo NS70PU When closing the laptop lid with an external screen connected, the mouse pointer has a constant movement to the lower right corner. Opening the lid again stops this movement, but after that the touchpad does no longer register clicks. The touchpad is connected both via i2c-hid and PS/2, the predecessor of this device (NS70MU) has the same layout in this regard and also strange behaviour caused by the psmouse and the i2c-hid driver fighting over touchpad control. This fix is reusing the same workaround by just disabling the PS/2 aux port, that is only used by the touchpad, to give the i2c-hid driver the lone control over the touchpad. v2: Rebased on current master Signed-off-by: Werner Sembach Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231205163602.16106-1-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index b585b1dab870e..cd45a65e17f2c 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1208,6 +1208,12 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "NS5x_7xPU"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOAUX) + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NJ50_70CU"), -- GitLab From de1034b38a346ef6be25fe8792f5d1e0684d5ff4 Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Fri, 2 Feb 2024 10:07:03 -0800 Subject: [PATCH 0619/1405] efi: runtime: Fix potential overflow of soft-reserved region size md_size will have been narrowed if we have >= 4GB worth of pages in a soft-reserved region. Signed-off-by: Andrew Bresticker Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/arm-runtime.c | 2 +- drivers/firmware/efi/riscv-runtime.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 83f5bb57fa4c4..83092d93f36a6 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -107,7 +107,7 @@ static int __init arm_enable_runtime_services(void) efi_memory_desc_t *md; for_each_efi_memory_desc(md) { - int md_size = md->num_pages << EFI_PAGE_SHIFT; + u64 md_size = md->num_pages << EFI_PAGE_SHIFT; struct resource *res; if (!(md->attribute & EFI_MEMORY_SP)) diff --git a/drivers/firmware/efi/riscv-runtime.c b/drivers/firmware/efi/riscv-runtime.c index 09525fb5c240e..01f0f90ea4183 100644 --- a/drivers/firmware/efi/riscv-runtime.c +++ b/drivers/firmware/efi/riscv-runtime.c @@ -85,7 +85,7 @@ static int __init riscv_enable_runtime_services(void) efi_memory_desc_t *md; for_each_efi_memory_desc(md) { - int md_size = md->num_pages << EFI_PAGE_SHIFT; + u64 md_size = md->num_pages << EFI_PAGE_SHIFT; struct resource *res; if (!(md->attribute & EFI_MEMORY_SP)) -- GitLab From 0bcff59ef7a652fcdc6d535554b63278c2406c8f Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Fri, 2 Feb 2024 10:07:04 -0800 Subject: [PATCH 0620/1405] efi: Don't add memblocks for soft-reserved memory Adding memblocks for soft-reserved regions prevents them from later being hotplugged in by dax_kmem. Signed-off-by: Andrew Bresticker Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi-init.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/firmware/efi/efi-init.c b/drivers/firmware/efi/efi-init.c index d4987d0130801..a00e07b853f22 100644 --- a/drivers/firmware/efi/efi-init.c +++ b/drivers/firmware/efi/efi-init.c @@ -143,15 +143,6 @@ static __init int is_usable_memory(efi_memory_desc_t *md) case EFI_BOOT_SERVICES_DATA: case EFI_CONVENTIONAL_MEMORY: case EFI_PERSISTENT_MEMORY: - /* - * Special purpose memory is 'soft reserved', which means it - * is set aside initially, but can be hotplugged back in or - * be assigned to the dax driver after boot. - */ - if (efi_soft_reserve_enabled() && - (md->attribute & EFI_MEMORY_SP)) - return false; - /* * According to the spec, these regions are no longer reserved * after calling ExitBootServices(). However, we can only use @@ -196,6 +187,16 @@ static __init void reserve_regions(void) size = npages << PAGE_SHIFT; if (is_memory(md)) { + /* + * Special purpose memory is 'soft reserved', which + * means it is set aside initially. Don't add a memblock + * for it now so that it can be hotplugged back in or + * be assigned to the dax driver after boot. + */ + if (efi_soft_reserve_enabled() && + (md->attribute & EFI_MEMORY_SP)) + continue; + early_init_dt_add_memory_arch(paddr, size); if (!is_usable_memory(md)) -- GitLab From 4255447ad34c5c3785fcdcf76cfa0271d6e5ed39 Mon Sep 17 00:00:00 2001 From: Szilard Fabian Date: Fri, 2 Feb 2024 10:28:59 -0800 Subject: [PATCH 0621/1405] Input: i8042 - add Fujitsu Lifebook U728 to i8042 quirk table Another Fujitsu-related patch. In the initial boot stage the integrated keyboard of Fujitsu Lifebook U728 refuses to work and it's not possible to type for example a dm-crypt passphrase without the help of an external keyboard. i8042.nomux kernel parameter resolves this issue but using that a PS/2 mouse is detected. This input device is unused even when the i2c-hid-acpi kernel module is blacklisted making the integrated ELAN touchpad (04F3:3092) not working at all. So this notebook uses a hid-over-i2c touchpad which is managed by the i2c_designware input driver. Since you can't find a PS/2 mouse port on this computer and you can't connect a PS/2 mouse to it even with an official port replicator I think it's safe to not use the PS/2 mouse port at all. Signed-off-by: Szilard Fabian Link: https://lore.kernel.org/r/20240103014717.127307-2-szfabian@bluemarch.art Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index cd45a65e17f2c..dfc6c581873b7 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -634,6 +634,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, .driver_data = (void *)(SERIO_QUIRK_NOAUX) }, + { + /* Fujitsu Lifebook U728 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U728"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOAUX) + }, { /* Gigabyte M912 */ .matches = { -- GitLab From ba5e1272142d051dcc57ca1d3225ad8a089f9858 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 1 Feb 2024 17:53:24 +0000 Subject: [PATCH 0622/1405] netdevsim: avoid potential loop in nsim_dev_trap_report_work() Many syzbot reports include the following trace [1] If nsim_dev_trap_report_work() can not grab the mutex, it should rearm itself at least one jiffie later. [1] Sending NMI from CPU 1 to CPUs 0: NMI backtrace for cpu 0 CPU: 0 PID: 32383 Comm: kworker/0:2 Not tainted 6.8.0-rc2-syzkaller-00031-g861c0981648f #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 Workqueue: events nsim_dev_trap_report_work RIP: 0010:bytes_is_nonzero mm/kasan/generic.c:89 [inline] RIP: 0010:memory_is_nonzero mm/kasan/generic.c:104 [inline] RIP: 0010:memory_is_poisoned_n mm/kasan/generic.c:129 [inline] RIP: 0010:memory_is_poisoned mm/kasan/generic.c:161 [inline] RIP: 0010:check_region_inline mm/kasan/generic.c:180 [inline] RIP: 0010:kasan_check_range+0x101/0x190 mm/kasan/generic.c:189 Code: 07 49 39 d1 75 0a 45 3a 11 b8 01 00 00 00 7c 0b 44 89 c2 e8 21 ed ff ff 83 f0 01 5b 5d 41 5c c3 48 85 d2 74 4f 48 01 ea eb 09 <48> 83 c0 01 48 39 d0 74 41 80 38 00 74 f2 eb b6 41 bc 08 00 00 00 RSP: 0018:ffffc90012dcf998 EFLAGS: 00000046 RAX: fffffbfff258af1e RBX: fffffbfff258af1f RCX: ffffffff8168eda3 RDX: fffffbfff258af1f RSI: 0000000000000004 RDI: ffffffff92c578f0 RBP: fffffbfff258af1e R08: 0000000000000000 R09: fffffbfff258af1e R10: ffffffff92c578f3 R11: ffffffff8acbcbc0 R12: 0000000000000002 R13: ffff88806db38400 R14: 1ffff920025b9f42 R15: ffffffff92c578e8 FS: 0000000000000000(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000c00994e078 CR3: 000000002c250000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: instrument_atomic_read include/linux/instrumented.h:68 [inline] atomic_read include/linux/atomic/atomic-instrumented.h:32 [inline] queued_spin_is_locked include/asm-generic/qspinlock.h:57 [inline] debug_spin_unlock kernel/locking/spinlock_debug.c:101 [inline] do_raw_spin_unlock+0x53/0x230 kernel/locking/spinlock_debug.c:141 __raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:150 [inline] _raw_spin_unlock_irqrestore+0x22/0x70 kernel/locking/spinlock.c:194 debug_object_activate+0x349/0x540 lib/debugobjects.c:726 debug_work_activate kernel/workqueue.c:578 [inline] insert_work+0x30/0x230 kernel/workqueue.c:1650 __queue_work+0x62e/0x11d0 kernel/workqueue.c:1802 __queue_delayed_work+0x1bf/0x270 kernel/workqueue.c:1953 queue_delayed_work_on+0x106/0x130 kernel/workqueue.c:1989 queue_delayed_work include/linux/workqueue.h:563 [inline] schedule_delayed_work include/linux/workqueue.h:677 [inline] nsim_dev_trap_report_work+0x9c0/0xc80 drivers/net/netdevsim/dev.c:842 process_one_work+0x886/0x15d0 kernel/workqueue.c:2633 process_scheduled_works kernel/workqueue.c:2706 [inline] worker_thread+0x8b9/0x1290 kernel/workqueue.c:2787 kthread+0x2c6/0x3a0 kernel/kthread.c:388 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242 Fixes: 012ec02ae441 ("netdevsim: convert driver to use unlocked devlink API during init/fini") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240201175324.3752746-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index b4d3b9cde8bd6..92a7a36b93ac0 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -835,14 +835,14 @@ static void nsim_dev_trap_report_work(struct work_struct *work) trap_report_dw.work); nsim_dev = nsim_trap_data->nsim_dev; - /* For each running port and enabled packet trap, generate a UDP - * packet with a random 5-tuple and report it. - */ if (!devl_trylock(priv_to_devlink(nsim_dev))) { - schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, 0); + schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, 1); return; } + /* For each running port and enabled packet trap, generate a UDP + * packet with a random 5-tuple and report it. + */ list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) { if (!netif_running(nsim_dev_port->ns->netdev)) continue; -- GitLab From 05519c86d6997cfb9bb6c82ce1595d1015b718dc Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Tue, 23 Jan 2024 22:12:20 +0000 Subject: [PATCH 0623/1405] KVM: x86/pmu: Fix type length error when reading pmu->fixed_ctr_ctrl Use a u64 instead of a u8 when taking a snapshot of pmu->fixed_ctr_ctrl when reprogramming fixed counters, as truncating the value results in KVM thinking fixed counter 2 is already disabled (the bug also affects fixed counters 3+, but KVM doesn't yet support those). As a result, if the guest disables fixed counter 2, KVM will get a false negative and fail to reprogram/disable emulation of the counter, which can leads to incorrect counts and spurious PMIs in the guest. Fixes: 76d287b2342e ("KVM: x86/pmu: Drop "u8 ctrl, int idx" for reprogram_fixed_counter()") Cc: stable@vger.kernel.org Signed-off-by: Mingwei Zhang Link: https://lore.kernel.org/r/20240123221220.3911317-1-mizhang@google.com [sean: rewrite changelog to call out the effects of the bug] Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/pmu_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index a6216c8747291..315c7c2ba89b1 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -71,7 +71,7 @@ static int fixed_pmc_events[] = { static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) { struct kvm_pmc *pmc; - u8 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl; + u64 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl; int i; pmu->fixed_ctr_ctrl = data; -- GitLab From 2e7d3b67630dfd8f178c41fa2217aa00e79a5887 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 1 Feb 2024 10:47:51 +0100 Subject: [PATCH 0624/1405] net: atlantic: Fix DMA mapping for PTP hwts ring Function aq_ring_hwts_rx_alloc() maps extra AQ_CFG_RXDS_DEF bytes for PTP HWTS ring but then generic aq_ring_free() does not take this into account. Create and use a specific function to free HWTS ring to fix this issue. Trace: [ 215.351607] ------------[ cut here ]------------ [ 215.351612] DMA-API: atlantic 0000:4b:00.0: device driver frees DMA memory with different size [device address=0x00000000fbdd0000] [map size=34816 bytes] [unmap size=32768 bytes] [ 215.351635] WARNING: CPU: 33 PID: 10759 at kernel/dma/debug.c:988 check_unmap+0xa6f/0x2360 ... [ 215.581176] Call Trace: [ 215.583632] [ 215.585745] ? show_trace_log_lvl+0x1c4/0x2df [ 215.590114] ? show_trace_log_lvl+0x1c4/0x2df [ 215.594497] ? debug_dma_free_coherent+0x196/0x210 [ 215.599305] ? check_unmap+0xa6f/0x2360 [ 215.603147] ? __warn+0xca/0x1d0 [ 215.606391] ? check_unmap+0xa6f/0x2360 [ 215.610237] ? report_bug+0x1ef/0x370 [ 215.613921] ? handle_bug+0x3c/0x70 [ 215.617423] ? exc_invalid_op+0x14/0x50 [ 215.621269] ? asm_exc_invalid_op+0x16/0x20 [ 215.625480] ? check_unmap+0xa6f/0x2360 [ 215.629331] ? mark_lock.part.0+0xca/0xa40 [ 215.633445] debug_dma_free_coherent+0x196/0x210 [ 215.638079] ? __pfx_debug_dma_free_coherent+0x10/0x10 [ 215.643242] ? slab_free_freelist_hook+0x11d/0x1d0 [ 215.648060] dma_free_attrs+0x6d/0x130 [ 215.651834] aq_ring_free+0x193/0x290 [atlantic] [ 215.656487] aq_ptp_ring_free+0x67/0x110 [atlantic] ... [ 216.127540] ---[ end trace 6467e5964dd2640b ]--- [ 216.132160] DMA-API: Mapped at: [ 216.132162] debug_dma_alloc_coherent+0x66/0x2f0 [ 216.132165] dma_alloc_attrs+0xf5/0x1b0 [ 216.132168] aq_ring_hwts_rx_alloc+0x150/0x1f0 [atlantic] [ 216.132193] aq_ptp_ring_alloc+0x1bb/0x540 [atlantic] [ 216.132213] aq_nic_init+0x4a1/0x760 [atlantic] Fixes: 94ad94558b0f ("net: aquantia: add PTP rings infrastructure") Signed-off-by: Ivan Vecera Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240201094752.883026-1-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/aquantia/atlantic/aq_ptp.c | 4 ++-- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 13 +++++++++++++ drivers/net/ethernet/aquantia/atlantic/aq_ring.h | 1 + 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c index abd4832e4ed21..5acb3e16b5677 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c @@ -993,7 +993,7 @@ int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic) return 0; err_exit_hwts_rx: - aq_ring_free(&aq_ptp->hwts_rx); + aq_ring_hwts_rx_free(&aq_ptp->hwts_rx); err_exit_ptp_rx: aq_ring_free(&aq_ptp->ptp_rx); err_exit_ptp_tx: @@ -1011,7 +1011,7 @@ void aq_ptp_ring_free(struct aq_nic_s *aq_nic) aq_ring_free(&aq_ptp->ptp_tx); aq_ring_free(&aq_ptp->ptp_rx); - aq_ring_free(&aq_ptp->hwts_rx); + aq_ring_hwts_rx_free(&aq_ptp->hwts_rx); aq_ptp_skb_ring_release(&aq_ptp->skb_ring); } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index cda8597b4e146..f7433abd65915 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -919,6 +919,19 @@ void aq_ring_free(struct aq_ring_s *self) } } +void aq_ring_hwts_rx_free(struct aq_ring_s *self) +{ + if (!self) + return; + + if (self->dx_ring) { + dma_free_coherent(aq_nic_get_dev(self->aq_nic), + self->size * self->dx_size + AQ_CFG_RXDS_DEF, + self->dx_ring, self->dx_ring_pa); + self->dx_ring = NULL; + } +} + unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data) { unsigned int count; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h index 52847310740a2..d627ace850ff5 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h @@ -210,6 +210,7 @@ int aq_ring_rx_fill(struct aq_ring_s *self); int aq_ring_hwts_rx_alloc(struct aq_ring_s *self, struct aq_nic_s *aq_nic, unsigned int idx, unsigned int size, unsigned int dx_size); +void aq_ring_hwts_rx_free(struct aq_ring_s *self); void aq_ring_hwts_rx_clean(struct aq_ring_s *self, struct aq_nic_s *aq_nic); unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data); -- GitLab From cb9f4a30fb85e1f4f149ada595a67899adb3db19 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 1 Feb 2024 19:42:38 +0100 Subject: [PATCH 0625/1405] selftests: net: cut more slack for gro fwd tests. The udpgro_fwd.sh self-tests are somewhat unstable. There are a few timing constraints the we struggle to meet on very slow environments. Instead of skipping the whole tests in such envs, increase the test resilience WRT very slow hosts: increase the inter-packets timeouts, avoid resetting the counters every second and finally disable reduce the background traffic noise. Tested with: for I in $(seq 1 100); do ./tools/testing/selftests/kselftest_install/run_kselftest.sh \ -t net:udpgro_fwd.sh || exit -1 done in a slow environment. Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests") Signed-off-by: Paolo Abeni Reviewed-by: David Ahern Link: https://lore.kernel.org/r/f4b6b11064a0d39182a9ae6a853abae3e9b4426a.1706812005.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgro_fwd.sh | 14 ++++++++++++-- tools/testing/selftests/net/udpgso_bench_rx.c | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index d6b9c759043ca..9cd5e885e91f7 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -39,6 +39,10 @@ create_ns() { for ns in $NS_SRC $NS_DST; do ip netns add $ns ip -n $ns link set dev lo up + + # disable route solicitations to decrease 'noise' traffic + ip netns exec $ns sysctl -qw net.ipv6.conf.default.router_solicitations=0 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.router_solicitations=0 done ip link add name veth$SRC type veth peer name veth$DST @@ -80,6 +84,12 @@ create_vxlan_pair() { create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V6$((3 - $ns)) vxlan6$ns 6 ip -n $BASE$ns addr add dev vxlan6$ns $OL_NET_V6$ns/24 nodad done + + # preload neighbur cache, do avoid some noisy traffic + local addr_dst=$(ip -j -n $BASE$DST link show dev vxlan6$DST |jq -r '.[]["address"]') + local addr_src=$(ip -j -n $BASE$SRC link show dev vxlan6$SRC |jq -r '.[]["address"]') + ip -n $BASE$DST neigh add dev vxlan6$DST lladdr $addr_src $OL_NET_V6$SRC + ip -n $BASE$SRC neigh add dev vxlan6$SRC lladdr $addr_dst $OL_NET_V6$DST } is_ipv6() { @@ -119,7 +129,7 @@ run_test() { # not enable GRO ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 4789 ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000 - ip netns exec $NS_DST ./udpgso_bench_rx -C 1000 -R 10 -n 10 -l 1300 $rx_args & + ip netns exec $NS_DST ./udpgso_bench_rx -C 2000 -R 100 -n 10 -l 1300 $rx_args & local spid=$! wait_local_port_listen "$NS_DST" 8000 udp ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst @@ -168,7 +178,7 @@ run_bench() { # bind the sender and the receiver to different CPUs to try # get reproducible results ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus" - ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 1000 -R 10 & + ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 2000 -R 100 & local spid=$! wait_local_port_listen "$NS_DST" 8000 udp ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c index f35a924d4a303..1cbadd267c963 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -375,7 +375,7 @@ static void do_recv(void) do_flush_udp(fd); tnow = gettimeofday_ms(); - if (tnow > treport) { + if (!cfg_expected_pkt_nr && tnow > treport) { if (packets) fprintf(stderr, "%s rx: %6lu MB/s %8lu calls/s\n", -- GitLab From d75df752647728c6d65e594e5e4493448bea6cda Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 1 Feb 2024 19:42:39 +0100 Subject: [PATCH 0626/1405] selftests: net: fix setup_ns usage in rtnetlink.sh The setup_ns helper marks the testns global variable as readonly. Later attempts to set such variable are unsuccessful, causing a couple test failures. Avoid completely the variable re-initialization and let the function access the global value. Fixes: e9ce7ededf14 ("selftests: rtnetlink: use setup_ns in bonding test") Signed-off-by: Paolo Abeni Reviewed-by: David Ahern Link: https://lore.kernel.org/r/6e7c937c8ff73ca52a21a4a536a13a76ec0173a8.1706812005.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/rtnetlink.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 4667d74579d13..874a2952aa8ee 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -440,7 +440,6 @@ kci_test_encap_vxlan() local ret=0 vxlan="test-vxlan0" vlan="test-vlan0" - testns="$1" run_cmd ip -netns "$testns" link add "$vxlan" type vxlan id 42 group 239.1.1.1 \ dev "$devdummy" dstport 4789 if [ $? -ne 0 ]; then @@ -485,7 +484,6 @@ kci_test_encap_fou() { local ret=0 name="test-fou" - testns="$1" run_cmd_grep 'Usage: ip fou' ip fou help if [ $? -ne 0 ];then end_test "SKIP: fou: iproute2 too old" @@ -526,8 +524,8 @@ kci_test_encap() run_cmd ip -netns "$testns" link set lo up run_cmd ip -netns "$testns" link add name "$devdummy" type dummy run_cmd ip -netns "$testns" link set "$devdummy" up - run_cmd kci_test_encap_vxlan "$testns" - run_cmd kci_test_encap_fou "$testns" + run_cmd kci_test_encap_vxlan + run_cmd kci_test_encap_fou ip netns del "$testns" return $ret -- GitLab From e71e016ad0f6e641a7898b8cda5f62f8e2beb2f1 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 1 Feb 2024 19:42:40 +0100 Subject: [PATCH 0627/1405] selftests: net: fix tcp listener handling in pmtu.sh The pmtu.sh test uses a few TCP listener in a problematic way: It hard-codes a constant timeout to wait for the listener starting-up in background. That introduces unneeded latency and on very slow and busy host it can fail. Additionally the test starts again the same listener in the same namespace on the same port, just after the previous connection completed. Fast host can attempt starting the new server before the old one really closed the socket. Address the issues using the wait_local_port_listen helper and explicitly waiting for the background listener process exit. Fixes: 136a1b434bbb ("selftests: net: test vxlan pmtu exceptions with tcp") Signed-off-by: Paolo Abeni Reviewed-by: David Ahern Link: https://lore.kernel.org/r/f8e8f6d44427d8c45e9f6a71ee1a321047452087.1706812005.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/pmtu.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 3f118e3f1c66d..f0febc19baae5 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -199,6 +199,7 @@ # Same as above but with IPv6 source lib.sh +source net_helper.sh PAUSE_ON_FAIL=no VERBOSE=0 @@ -1336,13 +1337,15 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { TCPDST="TCP:[${dst}]:50000" fi ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000 STDOUT > $tmpoutfile & + local socat_pid=$! - sleep 1 + wait_local_port_listen ${NS_B} 50000 tcp dd if=/dev/zero status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3 size=$(du -sb $tmpoutfile) size=${size%%/tmp/*} + wait ${socat_pid} [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1 done -- GitLab From 691bb4e49c98a47bc643dd808453136ce78b15b4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 1 Feb 2024 19:42:41 +0100 Subject: [PATCH 0628/1405] selftests: net: avoid just another constant wait Using hard-coded constant timeout to wait for some expected event is deemed to fail sooner or later, especially in slow env. Our CI has spotted another of such race: # TEST: ipv6: cleanup of cached exceptions - nexthop objects [FAIL] # can't delete veth device in a timely manner, PMTU dst likely leaked Replace the crude sleep with a loop looking for the expected condition at low interval for a much longer range. Fixes: b3cc4f8a8a41 ("selftests: pmtu: add explicit tests for PMTU exceptions cleanup") Signed-off-by: Paolo Abeni Reviewed-by: David Ahern Link: https://lore.kernel.org/r/fd5c745e9bb665b724473af6a9373a8c2a62b247.1706812005.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/pmtu.sh | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index f0febc19baae5..d65fdd407d73f 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -1957,6 +1957,13 @@ check_command() { return 0 } +check_running() { + pid=${1} + cmd=${2} + + [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "{cmd}" ] +} + test_cleanup_vxlanX_exception() { outer="${1}" encap="vxlan" @@ -1987,11 +1994,12 @@ test_cleanup_vxlanX_exception() { ${ns_a} ip link del dev veth_A-R1 & iplink_pid=$! - sleep 1 - if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then - err " can't delete veth device in a timely manner, PMTU dst likely leaked" - return 1 - fi + for i in $(seq 1 20); do + check_running ${iplink_pid} "iplinkdeldevveth_A-R1" || return 0 + sleep 0.1 + done + err " can't delete veth device in a timely manner, PMTU dst likely leaked" + return 1 } test_cleanup_ipv6_exception() { -- GitLab From d7f5fb33cf77247b7bf9a871aaeea72ca4f51ad7 Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Wed, 31 Jan 2024 21:14:13 +0100 Subject: [PATCH 0629/1405] tsnep: Fix mapping for zero copy XDP_TX action For XDP_TX action xdp_buff is converted to xdp_frame. The conversion is done by xdp_convert_buff_to_frame(). The memory type of the resulting xdp_frame depends on the memory type of the xdp_buff. For page pool based xdp_buff it produces xdp_frame with memory type MEM_TYPE_PAGE_POOL. For zero copy XSK pool based xdp_buff it produces xdp_frame with memory type MEM_TYPE_PAGE_ORDER0. tsnep_xdp_xmit_back() is not prepared for that and uses always the page pool buffer type TSNEP_TX_TYPE_XDP_TX. This leads to invalid mappings and the transmission of undefined data. Improve tsnep_xdp_xmit_back() to use the generic buffer type TSNEP_TX_TYPE_XDP_NDO for zero copy XDP_TX. Fixes: 3fc2333933fd ("tsnep: Add XDP socket zero-copy RX support") Signed-off-by: Gerhard Engleder Signed-off-by: David S. Miller --- drivers/net/ethernet/engleder/tsnep_main.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index 9aeff2b37a612..64eadd3207983 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -719,17 +719,25 @@ static void tsnep_xdp_xmit_flush(struct tsnep_tx *tx) static bool tsnep_xdp_xmit_back(struct tsnep_adapter *adapter, struct xdp_buff *xdp, - struct netdev_queue *tx_nq, struct tsnep_tx *tx) + struct netdev_queue *tx_nq, struct tsnep_tx *tx, + bool zc) { struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); bool xmit; + u32 type; if (unlikely(!xdpf)) return false; + /* no page pool for zero copy */ + if (zc) + type = TSNEP_TX_TYPE_XDP_NDO; + else + type = TSNEP_TX_TYPE_XDP_TX; + __netif_tx_lock(tx_nq, smp_processor_id()); - xmit = tsnep_xdp_xmit_frame_ring(xdpf, tx, TSNEP_TX_TYPE_XDP_TX); + xmit = tsnep_xdp_xmit_frame_ring(xdpf, tx, type); /* Avoid transmit queue timeout since we share it with the slow path */ if (xmit) @@ -1273,7 +1281,7 @@ static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog, case XDP_PASS: return false; case XDP_TX: - if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx)) + if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx, false)) goto out_failure; *status |= TSNEP_XDP_TX; return true; @@ -1323,7 +1331,7 @@ static bool tsnep_xdp_run_prog_zc(struct tsnep_rx *rx, struct bpf_prog *prog, case XDP_PASS: return false; case XDP_TX: - if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx)) + if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx, true)) goto out_failure; *status |= TSNEP_XDP_TX; return true; -- GitLab From d75abeec401f8c86b470e7028a13fcdc87e5dd06 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Thu, 1 Feb 2024 09:38:15 +0100 Subject: [PATCH 0630/1405] tunnels: fix out of bounds access when building IPv6 PMTU error If the ICMPv6 error is built from a non-linear skb we get the following splat, BUG: KASAN: slab-out-of-bounds in do_csum+0x220/0x240 Read of size 4 at addr ffff88811d402c80 by task netperf/820 CPU: 0 PID: 820 Comm: netperf Not tainted 6.8.0-rc1+ #543 ... kasan_report+0xd8/0x110 do_csum+0x220/0x240 csum_partial+0xc/0x20 skb_tunnel_check_pmtu+0xeb9/0x3280 vxlan_xmit_one+0x14c2/0x4080 vxlan_xmit+0xf61/0x5c00 dev_hard_start_xmit+0xfb/0x510 __dev_queue_xmit+0x7cd/0x32a0 br_dev_queue_push_xmit+0x39d/0x6a0 Use skb_checksum instead of csum_partial who cannot deal with non-linear SKBs. Fixes: 4cb47a8644cc ("tunnels: PMTU discovery support for directly bridged IP packets") Signed-off-by: Antoine Tenart Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 586b1b3e35b80..80ccd6661aa32 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -332,7 +332,7 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) }; skb_reset_network_header(skb); - csum = csum_partial(icmp6h, len, 0); + csum = skb_checksum(skb, skb_transport_offset(skb), len, 0); icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len, IPPROTO_ICMPV6, csum); -- GitLab From f3616173bf9be9bf39d131b120d6eea4e6324cb5 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Thu, 1 Feb 2024 20:41:05 +0800 Subject: [PATCH 0631/1405] atm: idt77252: fix a memleak in open_card_ubr0 When alloc_scq fails, card->vcs[0] (i.e. vc) should be freed. Otherwise, in the following call chain: idt77252_init_one |-> idt77252_dev_open |-> open_card_ubr0 |-> alloc_scq [failed] |-> deinit_card |-> vfree(card->vcs); card->vcs is freed and card->vcs[0] is leaked. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Zhipeng Lu Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/atm/idt77252.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index e327a0229dc17..e7f713cd70d3f 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -2930,6 +2930,8 @@ open_card_ubr0(struct idt77252_dev *card) vc->scq = alloc_scq(card, vc->class); if (!vc->scq) { printk("%s: can't get SCQ.\n", card->name); + kfree(card->vcs[0]); + card->vcs[0] = NULL; return -ENOMEM; } -- GitLab From b09b58e31b0f43d76f79b9943da3fb7c2843dcbb Mon Sep 17 00:00:00 2001 From: Zhipeng Lu Date: Thu, 1 Feb 2024 20:47:13 +0800 Subject: [PATCH 0632/1405] octeontx2-pf: Fix a memleak otx2_sq_init When qmem_alloc and pfvf->hw_ops->sq_aq_init fails, sq->sg should be freed to prevent memleak. Fixes: c9c12d339d93 ("octeontx2-pf: Add support for PTP clock") Signed-off-by: Zhipeng Lu Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/nic/otx2_common.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 7ca6941ea0b9b..02d0b707aea5b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -951,8 +951,11 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) if (pfvf->ptp && qidx < pfvf->hw.tx_queues) { err = qmem_alloc(pfvf->dev, &sq->timestamps, qset->sqe_cnt, sizeof(*sq->timestamps)); - if (err) + if (err) { + kfree(sq->sg); + sq->sg = NULL; return err; + } } sq->head = 0; @@ -968,7 +971,14 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) sq->stats.bytes = 0; sq->stats.pkts = 0; - return pfvf->hw_ops->sq_aq_init(pfvf, qidx, sqb_aura); + err = pfvf->hw_ops->sq_aq_init(pfvf, qidx, sqb_aura); + if (err) { + kfree(sq->sg); + sq->sg = NULL; + return err; + } + + return 0; } -- GitLab From f03869698bc3bd6d9d2d9f216b20da08a8c2508a Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 20 Dec 2023 01:02:42 +0100 Subject: [PATCH 0633/1405] arm64: dts: imx8mp: Disable UART4 by default on Data Modul i.MX8M Plus eDM SBC UART4 is used as CM7 coprocessor debug UART and may not be accessible from Linux in case it is protected by RDC. The RDC protection is set up by the platform firmware. UART4 is not used on this platform by Linux. Disable UART4 by default to prevent boot hangs, which occur when the RDC protection is in place. Fixes: 562d222f23f0 ("arm64: dts: imx8mp: Add support for Data Modul i.MX8M Plus eDM SBC") Signed-off-by: Marek Vasut Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts b/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts index d98a040860a48..5828c9d7821de 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-data-modul-edm-sbc.dts @@ -486,7 +486,7 @@ &uart4 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_uart4>; - status = "okay"; + status = "disabled"; }; &usb3_phy0 { -- GitLab From 54ce1927eb787f7bbb7ee664841c8f5932703f39 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 31 Jan 2024 15:55:38 -0800 Subject: [PATCH 0634/1405] cxl/cper: Fix errant CPER prints for CXL events Jonathan reports that CXL CPER events dump an extra generic error message. {1}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1 {1}[Hardware Error]: event severity: recoverable {1}[Hardware Error]: Error 0, type: recoverable {1}[Hardware Error]: section type: unknown, fbcd0a77-c260-417f-85a9-088b1621eba6 {1}[Hardware Error]: section length: 0x90 {1}[Hardware Error]: 00000000: 00000090 00000007 00000000 0d938086 ................ {1}[Hardware Error]: 00000010: 00100000 00000000 00040000 00000000 ................ ... CXL events were rerouted though the CXL subsystem for additional processing. However, when that work was done it was missed that cper_estatus_print_section() continued with a generic error message which is confusing. Teach CPER print code to ignore printing details of some section types. Assign the CXL event GUIDs to this set to prevent confusing unknown prints. Reported-by: Jonathan Cameron Suggested-by: Jonathan Cameron Signed-off-by: Ira Weiny Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Alison Schofield Signed-off-by: Ard Biesheuvel --- drivers/acpi/apei/ghes.c | 26 -------------------------- drivers/firmware/efi/cper.c | 19 +++++++++++++++++++ include/linux/cper.h | 23 +++++++++++++++++++++++ 3 files changed, 42 insertions(+), 26 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 7b7c605166e0c..fe825a432c5bf 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -680,32 +680,6 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, static DECLARE_RWSEM(cxl_cper_rw_sem); static cxl_cper_callback cper_callback; -/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */ - -/* - * General Media Event Record - * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 - */ -#define CPER_SEC_CXL_GEN_MEDIA_GUID \ - GUID_INIT(0xfbcd0a77, 0xc260, 0x417f, \ - 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6) - -/* - * DRAM Event Record - * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 - */ -#define CPER_SEC_CXL_DRAM_GUID \ - GUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, \ - 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24) - -/* - * Memory Module Event Record - * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 - */ -#define CPER_SEC_CXL_MEM_MODULE_GUID \ - GUID_INIT(0xfe927475, 0xdd59, 0x4339, \ - 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74) - static void cxl_cper_post_event(enum cxl_event_type event_type, struct cxl_cper_event_rec *rec) { diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 35c37f667781c..9b3884ff81e69 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -523,6 +523,17 @@ static void cper_print_tstamp(const char *pfx, } } +struct ignore_section { + guid_t guid; + const char *name; +}; + +static const struct ignore_section ignore_sections[] = { + { .guid = CPER_SEC_CXL_GEN_MEDIA_GUID, .name = "CXL General Media Event" }, + { .guid = CPER_SEC_CXL_DRAM_GUID, .name = "CXL DRAM Event" }, + { .guid = CPER_SEC_CXL_MEM_MODULE_GUID, .name = "CXL Memory Module Event" }, +}; + static void cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata, int sec_no) @@ -543,6 +554,14 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text); snprintf(newpfx, sizeof(newpfx), "%s ", pfx); + + for (int i = 0; i < ARRAY_SIZE(ignore_sections); i++) { + if (guid_equal(sec_type, &ignore_sections[i].guid)) { + printk("%ssection_type: %s\n", newpfx, ignore_sections[i].name); + return; + } + } + if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) { struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata); diff --git a/include/linux/cper.h b/include/linux/cper.h index c1a7dc3251215..265b0f8fc0b3c 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -90,6 +90,29 @@ enum { GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \ 0x72, 0x2D, 0xEB, 0x41) +/* CXL Event record UUIDs are formatted as GUIDs and reported in section type */ +/* + * General Media Event Record + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 + */ +#define CPER_SEC_CXL_GEN_MEDIA_GUID \ + GUID_INIT(0xfbcd0a77, 0xc260, 0x417f, \ + 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6) +/* + * DRAM Event Record + * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44 + */ +#define CPER_SEC_CXL_DRAM_GUID \ + GUID_INIT(0x601dcbb3, 0x9c06, 0x4eab, \ + 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24) +/* + * Memory Module Event Record + * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 + */ +#define CPER_SEC_CXL_MEM_MODULE_GUID \ + GUID_INIT(0xfe927475, 0xdd59, 0x4339, \ + 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74) + /* * Flags bits definitions for flags in struct cper_record_header * If set, the error has been recovered -- GitLab From dbea519d6878c298dd0f48e6ec2dbacebe4bbb2a Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 31 Jan 2024 15:55:39 -0800 Subject: [PATCH 0635/1405] cxl/trace: Remove unnecessary memcpy's CPER events don't have UUIDs. Therefore UUIDs were removed from the records passed to trace events and replaced with hard coded values. As pointed out by Jonathan, the new defines for the UUIDs present a more efficient way to assign UUID in trace records.[1] Replace memcpy's with the use of static data. [1] https://lore.kernel.org/all/20240108132325.00000e9c@Huawei.com/ Suggested-by: Jonathan Cameron Signed-off-by: Ira Weiny Reviewed-by: Dave Jiang Reviewed-by: Alison Schofield Reviewed-by: Jonathan Cameron Signed-off-by: Ard Biesheuvel --- drivers/cxl/core/trace.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index 89445435303aa..bdf117a33744b 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -338,7 +338,7 @@ TRACE_EVENT(cxl_general_media, TP_fast_assign( CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); - memcpy(&__entry->hdr_uuid, &CXL_EVENT_GEN_MEDIA_UUID, sizeof(uuid_t)); + __entry->hdr_uuid = CXL_EVENT_GEN_MEDIA_UUID; /* General Media */ __entry->dpa = le64_to_cpu(rec->phys_addr); @@ -425,7 +425,7 @@ TRACE_EVENT(cxl_dram, TP_fast_assign( CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); - memcpy(&__entry->hdr_uuid, &CXL_EVENT_DRAM_UUID, sizeof(uuid_t)); + __entry->hdr_uuid = CXL_EVENT_DRAM_UUID; /* DRAM */ __entry->dpa = le64_to_cpu(rec->phys_addr); @@ -573,7 +573,7 @@ TRACE_EVENT(cxl_memory_module, TP_fast_assign( CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr); - memcpy(&__entry->hdr_uuid, &CXL_EVENT_MEM_MODULE_UUID, sizeof(uuid_t)); + __entry->hdr_uuid = CXL_EVENT_MEM_MODULE_UUID; /* Memory Module Event */ __entry->event_type = rec->event_type; -- GitLab From bd97cea7b18a0a553773af806dfbfac27a7c4acb Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 31 Jan 2024 17:38:46 -0600 Subject: [PATCH 0636/1405] RDMA/irdma: Fix KASAN issue with tasklet KASAN testing revealed the following issue assocated with freeing an IRQ. [50006.466686] Call Trace: [50006.466691] [50006.489538] dump_stack+0x5c/0x80 [50006.493475] print_address_description.constprop.6+0x1a/0x150 [50006.499872] ? irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.505742] ? irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.511644] kasan_report.cold.11+0x7f/0x118 [50006.516572] ? irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.522473] irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.528232] irdma_process_ceq+0xb2/0x400 [irdma] [50006.533601] ? irdma_hw_flush_wqes_callback+0x370/0x370 [irdma] [50006.540298] irdma_ceq_dpc+0x44/0x100 [irdma] [50006.545306] tasklet_action_common.isra.14+0x148/0x2c0 [50006.551096] __do_softirq+0x1d0/0xaf8 [50006.555396] irq_exit_rcu+0x219/0x260 [50006.559670] irq_exit+0xa/0x20 [50006.563320] smp_apic_timer_interrupt+0x1bf/0x690 [50006.568645] apic_timer_interrupt+0xf/0x20 [50006.573341] The issue is that a tasklet could be pending on another core racing the delete of the irq. Fix by insuring any scheduled tasklet is killed after deleting the irq. Fixes: 44d9e52977a1 ("RDMA/irdma: Implement device initialization definitions") Signed-off-by: Mike Marciniszyn Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Link: https://lore.kernel.org/r/20240131233849.400285-2-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/hw.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index bd4b2b8964444..2f8d18d8be3b7 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -570,6 +570,13 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf, dev->irq_ops->irdma_dis_irq(dev, msix_vec->idx); irq_update_affinity_hint(msix_vec->irq, NULL); free_irq(msix_vec->irq, dev_id); + if (rf == dev_id) { + tasklet_kill(&rf->dpc_tasklet); + } else { + struct irdma_ceq *iwceq = (struct irdma_ceq *)dev_id; + + tasklet_kill(&iwceq->dpc_tasklet); + } } /** -- GitLab From ee107186bcfd25d7873258f3f75440e20f5e6416 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Wed, 31 Jan 2024 17:38:47 -0600 Subject: [PATCH 0637/1405] RDMA/irdma: Validate max_send_wr and max_recv_wr Validate that max_send_wr and max_recv_wr is within the supported range. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Change-Id: I2fc8b10292b641fddd20b36986a9dae90a93f4be Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Link: https://lore.kernel.org/r/20240131233849.400285-3-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index b5eb8d421988c..cb828e3da478e 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -839,7 +839,9 @@ static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr, if (init_attr->cap.max_inline_data > uk_attrs->max_hw_inline || init_attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags || - init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags) + init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags || + init_attr->cap.max_send_wr > uk_attrs->max_hw_wq_quanta || + init_attr->cap.max_recv_wr > uk_attrs->max_hw_rq_quanta) return -EINVAL; if (rdma_protocol_roce(&iwdev->ibdev, 1)) { -- GitLab From 666047f3ece9f991774c1fe9b223139a9ef8908d Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 31 Jan 2024 17:38:48 -0600 Subject: [PATCH 0638/1405] RDMA/irdma: Set the CQ read threshold for GEN 1 The CQ shadow read threshold is currently not set for GEN 2. This could cause an invalid CQ overflow condition, so remove the GEN check that exclused GEN 1. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Link: https://lore.kernel.org/r/20240131233849.400285-4-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index cb828e3da478e..0b046c061742b 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2186,9 +2186,8 @@ static int irdma_create_cq(struct ib_cq *ibcq, info.cq_base_pa = iwcq->kmem.pa; } - if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) - info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2, - (u32)IRDMA_MAX_CQ_READ_THRESH); + info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2, + (u32)IRDMA_MAX_CQ_READ_THRESH); if (irdma_sc_cq_init(cq, &info)) { ibdev_dbg(&iwdev->ibdev, "VERBS: init cq fail\n"); -- GitLab From 630bdb6f28ca9e5ff79e244030170ac788478332 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 31 Jan 2024 17:38:49 -0600 Subject: [PATCH 0639/1405] RDMA/irdma: Add AE for too many RNRS Add IRDMA_AE_LLP_TOO_MANY_RNRS to the list of AE's processed as an abnormal asyncronous event. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Link: https://lore.kernel.org/r/20240131233849.400285-5-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/defs.h | 1 + drivers/infiniband/hw/irdma/hw.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index 8fb752f2eda29..2cb4b96db7212 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -346,6 +346,7 @@ enum irdma_cqp_op_type { #define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b #define IRDMA_AE_LLP_DOUBT_REACHABILITY 0x050c #define IRDMA_AE_LLP_CONNECTION_ESTABLISHED 0x050e +#define IRDMA_AE_LLP_TOO_MANY_RNRS 0x050f #define IRDMA_AE_RESOURCE_EXHAUSTION 0x0520 #define IRDMA_AE_RESET_SENT 0x0601 #define IRDMA_AE_TERMINATE_SENT 0x0602 diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 2f8d18d8be3b7..ad50b77282f8a 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -387,6 +387,7 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) case IRDMA_AE_LLP_TOO_MANY_RETRIES: case IRDMA_AE_LCE_QP_CATASTROPHIC: case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC: + case IRDMA_AE_LLP_TOO_MANY_RNRS: case IRDMA_AE_LCE_CQ_CATASTROPHIC: case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: default: -- GitLab From e6f57c6881916df39db7d95981a8ad2b9c3458d6 Mon Sep 17 00:00:00 2001 From: Daniel Vacek Date: Thu, 1 Feb 2024 09:10:08 +0100 Subject: [PATCH 0640/1405] IB/hfi1: Fix sdma.h tx->num_descs off-by-one error Unfortunately the commit `fd8958efe877` introduced another error causing the `descs` array to overflow. This reults in further crashes easily reproducible by `sendmsg` system call. [ 1080.836473] general protection fault, probably for non-canonical address 0x400300015528b00a: 0000 [#1] PREEMPT SMP PTI [ 1080.869326] RIP: 0010:hfi1_ipoib_build_ib_tx_headers.constprop.0+0xe1/0x2b0 [hfi1] -- [ 1080.974535] Call Trace: [ 1080.976990] [ 1081.021929] hfi1_ipoib_send_dma_common+0x7a/0x2e0 [hfi1] [ 1081.027364] hfi1_ipoib_send_dma_list+0x62/0x270 [hfi1] [ 1081.032633] hfi1_ipoib_send+0x112/0x300 [hfi1] [ 1081.042001] ipoib_start_xmit+0x2a9/0x2d0 [ib_ipoib] [ 1081.046978] dev_hard_start_xmit+0xc4/0x210 -- [ 1081.148347] __sys_sendmsg+0x59/0xa0 crash> ipoib_txreq 0xffff9cfeba229f00 struct ipoib_txreq { txreq = { list = { next = 0xffff9cfeba229f00, prev = 0xffff9cfeba229f00 }, descp = 0xffff9cfeba229f40, coalesce_buf = 0x0, wait = 0xffff9cfea4e69a48, complete = 0xffffffffc0fe0760 , packet_len = 0x46d, tlen = 0x0, num_desc = 0x0, desc_limit = 0x6, next_descq_idx = 0x45c, coalesce_idx = 0x0, flags = 0x0, descs = {{ qw = {0x8024000120dffb00, 0x4} # SDMA_DESC0_FIRST_DESC_FLAG (bit 63) }, { qw = { 0x3800014231b108, 0x4} }, { qw = { 0x310000e4ee0fcf0, 0x8} }, { qw = { 0x3000012e9f8000, 0x8} }, { qw = { 0x59000dfb9d0000, 0x8} }, { qw = { 0x78000e02e40000, 0x8} }} }, sdma_hdr = 0x400300015528b000, <<< invalid pointer in the tx request structure sdma_status = 0x0, SDMA_DESC0_LAST_DESC_FLAG (bit 62) complete = 0x0, priv = 0x0, txq = 0xffff9cfea4e69880, skb = 0xffff9d099809f400 } If an SDMA send consists of exactly 6 descriptors and requires dword padding (in the 7th descriptor), the sdma_txreq descriptor array is not properly expanded and the packet will overflow into the container structure. This results in a panic when the send completion runs. The exact panic varies depending on what elements of the container structure get corrupted. The fix is to use the correct expression in _pad_sdma_tx_descs() to test the need to expand the descriptor array. With this patch the crashes are no longer reproducible and the machine is stable. Fixes: fd8958efe877 ("IB/hfi1: Fix sdma.h tx->num_descs off-by-one errors") Cc: stable@vger.kernel.org Reported-by: Mats Kronberg Tested-by: Mats Kronberg Signed-off-by: Daniel Vacek Link: https://lore.kernel.org/r/20240201081009.1109442-1-neelx@redhat.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 6e5ac2023328a..b67d23b1f2862 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -3158,7 +3158,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) { int rval = 0; - if ((unlikely(tx->num_desc + 1 == tx->desc_limit))) { + if ((unlikely(tx->num_desc == tx->desc_limit))) { rval = _extend_sdma_tx_descs(dd, tx); if (rval) { __sdma_txclean(dd, tx); -- GitLab From a41f91b4da1490b90ae5859f3464e94d418fea2c Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Thu, 1 Feb 2024 20:11:03 +0800 Subject: [PATCH 0641/1405] arm64: dts: rockchip: aliase sdmmc as mmc1 for Cool Pi 4B Follow others rk3588 based boards, and u-boot only use mmc0/1 as mmc boot targets, so aliase sdmmc as mmc1. Fixes: 3f5d336d64d6 ("arm64: dts: rockchip: Add support for rk3588s based board Cool Pi 4B") Signed-off-by: Andy Yan Reviewed-by: Dragan Simic Link: https://lore.kernel.org/r/20240201121106.1471301-1-andyshrk@163.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts index ef4f058c20ff1..e037bf9db75af 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-coolpi-4b.dts @@ -19,8 +19,8 @@ aliases { mmc0 = &sdhci; - mmc1 = &sdio; - mmc2 = &sdmmc; + mmc1 = &sdmmc; + mmc2 = &sdio; }; analog-sound { -- GitLab From cebda3dd36bebb11d3e6df4d1944a1cdcf3cd4cf Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Thu, 1 Feb 2024 20:11:04 +0800 Subject: [PATCH 0642/1405] arm64: dts: rockchip: aliase sdmmc as mmc1 for Cool Pi CM5 EVB Follow others rk3588 based boards, and u-boot only use mmc0/1 as mmc boot targets, so aliase sdmmc as mmc1. Fixes: 791c154c3982 ("arm64: dts: rockchip: Add support for rk3588 based board Cool Pi CM5 EVB") Signed-off-by: Andy Yan Reviewed-by: Dragan Simic Link: https://lore.kernel.org/r/20240201121106.1471301-2-andyshrk@163.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi index 0b02f4d6e0033..cce1c8e835877 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi @@ -16,8 +16,8 @@ aliases { mmc0 = &sdhci; - mmc1 = &sdio; - mmc2 = &sdmmc; + mmc1 = &sdmmc; + mmc2 = &sdio; serial2 = &uart2; }; -- GitLab From c7e8dbb3bc12f389d617e6c0e9537f11a1fa6eb1 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Thu, 1 Feb 2024 20:11:05 +0800 Subject: [PATCH 0643/1405] arm64: dts: rockchip: rename vcc5v0_usb30_host regulator for Cool Pi CM5 EVB According to the schematic, USB20 HOST0 and HOST1 each have their own independent power supply, but these two regulators controlled by a same GPIO, so give it a more appropriate name. Fixes: 791c154c3982 ("arm64: dts: rockchip: Add support for rk3588 based board Cool Pi CM5 EVB") Signed-off-by: Andy Yan Reviewed-by: Dragan Simic Link: https://lore.kernel.org/r/20240201121106.1471301-3-andyshrk@163.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts index d4c70835e0fe2..609f35ee4b0ee 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts @@ -72,7 +72,7 @@ vin-supply = <&vcc3v3_sys>; }; - vcc5v0_usb30_host: vcc5v0-usb30-host-regulator { + vcc5v0_usb_host1: vcc5v0_usb_host2: vcc5v0-usb-host-regulator { compatible = "regulator-fixed"; regulator-name = "vcc5v0_host"; regulator-boot-on; @@ -188,12 +188,12 @@ }; &u2phy2_host { - phy-supply = <&vcc5v0_usb30_host>; + phy-supply = <&vcc5v0_usb_host1>; status = "okay"; }; &u2phy3_host { - phy-supply = <&vcc5v0_usb30_host>; + phy-supply = <&vcc5v0_usb_host2>; status = "okay"; }; -- GitLab From 5556a8c3af8b4ee648b0fd4cdb4a1356d92de07b Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Thu, 1 Feb 2024 20:11:06 +0800 Subject: [PATCH 0644/1405] arm64: dts: rockchip: Fix the num-lanes of pcie3x4 on Cool Pi CM5 EVB The 4 lane pcie30 phy is shared by pcie3x4 and pcie3x2, so the num-lanes of pcie3x4 should be 2. Fixes: 791c154c3982 ("arm64: dts: rockchip: Add support for rk3588 based board Cool Pi CM5 EVB") Signed-off-by: Andy Yan Link: https://lore.kernel.org/r/20240201121106.1471301-4-andyshrk@163.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts index 609f35ee4b0ee..a4946cdc3bb34 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-evb.dts @@ -114,6 +114,7 @@ status = "okay"; }; +/* Standard pcie */ &pcie3x2 { reset-gpios = <&gpio3 RK_PB0 GPIO_ACTIVE_HIGH>; vpcie3v3-supply = <&vcc3v3_sys>; @@ -122,6 +123,7 @@ /* M.2 M-Key ssd */ &pcie3x4 { + num-lanes = <2>; reset-gpios = <&gpio4 RK_PB6 GPIO_ACTIVE_HIGH>; vpcie3v3-supply = <&vcc3v3_sys>; status = "okay"; -- GitLab From f98643d8daf3443e3b414a82d0cb3d745f8c8bbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 29 Jan 2024 12:32:02 +0100 Subject: [PATCH 0645/1405] ARM: dts: rockchip: Drop interrupts property from pwm-rockchip nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The binding doesn't define interrupts and adding such a definition was refused because it's unclear how they should ever be used and the relevant registers are outside the PWM range. So drop them fixing several dtbs_check warnings like: arch/arm/boot/dts/rockchip/rv1108-elgin-r1.dtb: pwm@10280030: 'interrupts' does not match any of the regexes: 'pinctrl-[0-9]+' from schema $id: http://devicetree.org/schemas/pwm/pwm-rockchip.yaml# Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20240129113205.2453029-2-u.kleine-koenig@pengutronix.de Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rockchip/rv1108.dtsi | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/arm/boot/dts/rockchip/rv1108.dtsi b/arch/arm/boot/dts/rockchip/rv1108.dtsi index abf3006f0a842..f3291f3bbc6fd 100644 --- a/arch/arm/boot/dts/rockchip/rv1108.dtsi +++ b/arch/arm/boot/dts/rockchip/rv1108.dtsi @@ -196,7 +196,6 @@ pwm4: pwm@10280000 { compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm"; reg = <0x10280000 0x10>; - interrupts = ; clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>; clock-names = "pwm", "pclk"; pinctrl-names = "default"; @@ -208,7 +207,6 @@ pwm5: pwm@10280010 { compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm"; reg = <0x10280010 0x10>; - interrupts = ; clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>; clock-names = "pwm", "pclk"; pinctrl-names = "default"; @@ -220,7 +218,6 @@ pwm6: pwm@10280020 { compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm"; reg = <0x10280020 0x10>; - interrupts = ; clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>; clock-names = "pwm", "pclk"; pinctrl-names = "default"; @@ -232,7 +229,6 @@ pwm7: pwm@10280030 { compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm"; reg = <0x10280030 0x10>; - interrupts = ; clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>; clock-names = "pwm", "pclk"; pinctrl-names = "default"; @@ -386,7 +382,6 @@ pwm0: pwm@20040000 { compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm"; reg = <0x20040000 0x10>; - interrupts = ; clocks = <&cru SCLK_PWM0_PMU>, <&cru PCLK_PWM0_PMU>; clock-names = "pwm", "pclk"; pinctrl-names = "default"; @@ -398,7 +393,6 @@ pwm1: pwm@20040010 { compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm"; reg = <0x20040010 0x10>; - interrupts = ; clocks = <&cru SCLK_PWM0_PMU>, <&cru PCLK_PWM0_PMU>; clock-names = "pwm", "pclk"; pinctrl-names = "default"; @@ -410,7 +404,6 @@ pwm2: pwm@20040020 { compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm"; reg = <0x20040020 0x10>; - interrupts = ; clocks = <&cru SCLK_PWM0_PMU>, <&cru PCLK_PWM0_PMU>; clock-names = "pwm", "pclk"; pinctrl-names = "default"; @@ -422,7 +415,6 @@ pwm3: pwm@20040030 { compatible = "rockchip,rv1108-pwm", "rockchip,rk3288-pwm"; reg = <0x20040030 0x10>; - interrupts = ; clocks = <&cru SCLK_PWM0_PMU>, <&cru PCLK_PWM0_PMU>; clock-names = "pwm", "pclk"; pinctrl-names = "default"; -- GitLab From bab7ec1d80fe5998ce5ab4480f15e7cce43e2cdf Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Mon, 29 Jan 2024 12:48:51 +0100 Subject: [PATCH 0646/1405] arm64: dts: rockchip: drop unneeded status from rk3588-jaguar gpio-leds The default status is okay, so it is definitly not necessary to set it on a newly added node. Fixes: d1b8b36a2cc5 ("arm64: dts: rockchip: add Theobroma Jaguar SBC") Signed-off-by: Heiko Stuebner Reviewed-by: Quentin Schulz Link: https://lore.kernel.org/r/20240129114851.2019861-1-heiko@sntech.de --- arch/arm64/boot/dts/rockchip/rk3588-jaguar.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-jaguar.dts b/arch/arm64/boot/dts/rockchip/rk3588-jaguar.dts index 4ce70fb75a307..39d65002add1e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-jaguar.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-jaguar.dts @@ -62,7 +62,6 @@ compatible = "gpio-leds"; pinctrl-names = "default"; pinctrl-0 = <&led1_pin>; - status = "okay"; /* LED1 on PCB */ led-1 { -- GitLab From 54be6c6c5ae8e0d93a6c4641cb7528eb0b6ba478 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Feb 2024 12:20:36 +0000 Subject: [PATCH 0647/1405] Linux 6.8-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 113f7c762f0a0..a171eafce2a3b 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* -- GitLab From 42dfa94d802a48c871e2017cbf86153270c86632 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 4 Feb 2024 16:43:05 +0900 Subject: [PATCH 0648/1405] KVM: arm64: Do not source virt/lib/Kconfig twice For ARCH=arm64, virt/lib/Kconfig is sourced twice, from arch/arm64/kvm/Kconfig and from drivers/vfio/Kconfig. There is no good reason to parse virt/lib/Kconfig twice. Commit 2412405b3141 ("KVM: arm/arm64: register irq bypass consumer on ARM/ARM64") should not have added this 'source' directive. Signed-off-by: Masahiro Yamada Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240204074305.31492-1-masahiroy@kernel.org --- arch/arm64/kvm/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 6c3c8ca73e7fd..27ca89b628a02 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -3,7 +3,6 @@ # KVM configuration # -source "virt/lib/Kconfig" source "virt/kvm/Kconfig" menuconfig VIRTUALIZATION -- GitLab From 9cae43da9867412f8bd09aee5c8a8dc5e8dc3dc2 Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Thu, 1 Feb 2024 20:40:38 -0800 Subject: [PATCH 0649/1405] hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed If hv_netvsc driver is unloaded and reloaded, the NET_DEVICE_REGISTER handler cannot perform VF register successfully as the register call is received before netvsc_probe is finished. This is because we register register_netdevice_notifier() very early( even before vmbus_driver_register()). To fix this, we try to register each such matching VF( if it is visible as a netdevice) at the end of netvsc_probe. Cc: stable@vger.kernel.org Fixes: 85520856466e ("hv_netvsc: Fix race of register_netdevice_notifier and VF register") Suggested-by: Dexuan Cui Signed-off-by: Shradha Gupta Reviewed-by: Haiyang Zhang Reviewed-by: Dexuan Cui Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 82 +++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 20 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 273bd8a20122c..11831a1c97623 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -42,6 +42,10 @@ #define LINKCHANGE_INT (2 * HZ) #define VF_TAKEOVER_INT (HZ / 10) +/* Macros to define the context of vf registration */ +#define VF_REG_IN_PROBE 1 +#define VF_REG_IN_NOTIFIER 2 + static unsigned int ring_size __ro_after_init = 128; module_param(ring_size, uint, 0444); MODULE_PARM_DESC(ring_size, "Ring buffer size (# of 4K pages)"); @@ -2185,7 +2189,7 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb) } static int netvsc_vf_join(struct net_device *vf_netdev, - struct net_device *ndev) + struct net_device *ndev, int context) { struct net_device_context *ndev_ctx = netdev_priv(ndev); int ret; @@ -2208,7 +2212,11 @@ static int netvsc_vf_join(struct net_device *vf_netdev, goto upper_link_failed; } - schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); + /* If this registration is called from probe context vf_takeover + * is taken care of later in probe itself. + */ + if (context == VF_REG_IN_NOTIFIER) + schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); call_netdevice_notifiers(NETDEV_JOIN, vf_netdev); @@ -2346,7 +2354,7 @@ static int netvsc_prepare_bonding(struct net_device *vf_netdev) return NOTIFY_DONE; } -static int netvsc_register_vf(struct net_device *vf_netdev) +static int netvsc_register_vf(struct net_device *vf_netdev, int context) { struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; @@ -2386,7 +2394,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); - if (netvsc_vf_join(vf_netdev, ndev) != 0) + if (netvsc_vf_join(vf_netdev, ndev, context) != 0) return NOTIFY_DONE; dev_hold(vf_netdev); @@ -2484,10 +2492,31 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) return NOTIFY_OK; } +static int check_dev_is_matching_vf(struct net_device *event_ndev) +{ + /* Skip NetVSC interfaces */ + if (event_ndev->netdev_ops == &device_ops) + return -ENODEV; + + /* Avoid non-Ethernet type devices */ + if (event_ndev->type != ARPHRD_ETHER) + return -ENODEV; + + /* Avoid Vlan dev with same MAC registering as VF */ + if (is_vlan_dev(event_ndev)) + return -ENODEV; + + /* Avoid Bonding master dev with same MAC registering as VF */ + if (netif_is_bond_master(event_ndev)) + return -ENODEV; + + return 0; +} + static int netvsc_probe(struct hv_device *dev, const struct hv_vmbus_device_id *dev_id) { - struct net_device *net = NULL; + struct net_device *net = NULL, *vf_netdev; struct net_device_context *net_device_ctx; struct netvsc_device_info *device_info = NULL; struct netvsc_device *nvdev; @@ -2599,6 +2628,30 @@ static int netvsc_probe(struct hv_device *dev, } list_add(&net_device_ctx->list, &netvsc_dev_list); + + /* When the hv_netvsc driver is unloaded and reloaded, the + * NET_DEVICE_REGISTER for the vf device is replayed before probe + * is complete. This is because register_netdevice_notifier() gets + * registered before vmbus_driver_register() so that callback func + * is set before probe and we don't miss events like NETDEV_POST_INIT + * So, in this section we try to register the matching vf device that + * is present as a netdevice, knowing that its register call is not + * processed in the netvsc_netdev_notifier(as probing is progress and + * get_netvsc_byslot fails). + */ + for_each_netdev(dev_net(net), vf_netdev) { + ret = check_dev_is_matching_vf(vf_netdev); + if (ret != 0) + continue; + + if (net != get_netvsc_byslot(vf_netdev)) + continue; + + netvsc_prepare_bonding(vf_netdev); + netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE); + __netvsc_vf_setup(net, vf_netdev); + break; + } rtnl_unlock(); netvsc_devinfo_put(device_info); @@ -2754,28 +2807,17 @@ static int netvsc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + int ret = 0; - /* Skip our own events */ - if (event_dev->netdev_ops == &device_ops) - return NOTIFY_DONE; - - /* Avoid non-Ethernet type devices */ - if (event_dev->type != ARPHRD_ETHER) - return NOTIFY_DONE; - - /* Avoid Vlan dev with same MAC registering as VF */ - if (is_vlan_dev(event_dev)) - return NOTIFY_DONE; - - /* Avoid Bonding master dev with same MAC registering as VF */ - if (netif_is_bond_master(event_dev)) + ret = check_dev_is_matching_vf(event_dev); + if (ret != 0) return NOTIFY_DONE; switch (event) { case NETDEV_POST_INIT: return netvsc_prepare_bonding(event_dev); case NETDEV_REGISTER: - return netvsc_register_vf(event_dev); + return netvsc_register_vf(event_dev, VF_REG_IN_NOTIFIER); case NETDEV_UNREGISTER: return netvsc_unregister_vf(event_dev); case NETDEV_UP: -- GitLab From 621c6257128149e45b36ffb973a01c3f3461b893 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Sun, 4 Feb 2024 04:56:17 -0800 Subject: [PATCH 0650/1405] iio: hid-sensor-als: Return 0 for HID_USAGE_SENSOR_TIME_TIMESTAMP When als_capture_sample() is called with usage ID HID_USAGE_SENSOR_TIME_TIMESTAMP, return 0. The HID sensor core ignores the return value for capture_sample() callback, so return value doesn't make difference. But correct the return value to return success instead of -EINVAL. Signed-off-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20240204125617.2635574-1-srinivas.pandruvada@linux.intel.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/hid-sensor-als.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c index 5cd27f04b45e6..b6c4bef2a7bb2 100644 --- a/drivers/iio/light/hid-sensor-als.c +++ b/drivers/iio/light/hid-sensor-als.c @@ -226,6 +226,7 @@ static int als_capture_sample(struct hid_sensor_hub_device *hsdev, case HID_USAGE_SENSOR_TIME_TIMESTAMP: als_state->timestamp = hid_sensor_convert_timestamp(&als_state->common_attributes, *(s64 *)raw_data); + ret = 0; break; default: break; -- GitLab From 1168491e7f53581ba7b6014a39a49cfbbb722feb Mon Sep 17 00:00:00 2001 From: Loic Prylli Date: Fri, 3 Nov 2023 11:30:55 +0100 Subject: [PATCH 0651/1405] hwmon: (aspeed-pwm-tacho) mutex for tach reading the ASPEED_PTCR_RESULT Register can only hold the result for a single fan input. Adding a mutex to protect the register until the reading is done. Signed-off-by: Loic Prylli Signed-off-by: Alexander Hansen Fixes: 2d7a548a3eff ("drivers: hwmon: Support for ASPEED PWM/Fan tach") Link: https://lore.kernel.org/r/121d888762a1232ef403cf35230ccf7b3887083a.1699007401.git.alexander.hansen@9elements.com Signed-off-by: Guenter Roeck --- drivers/hwmon/aspeed-pwm-tacho.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c index f6e1e55e82922..4acc1858d8acf 100644 --- a/drivers/hwmon/aspeed-pwm-tacho.c +++ b/drivers/hwmon/aspeed-pwm-tacho.c @@ -195,6 +195,8 @@ struct aspeed_pwm_tacho_data { u8 fan_tach_ch_source[MAX_ASPEED_FAN_TACH_CHANNELS]; struct aspeed_cooling_device *cdev[8]; const struct attribute_group *groups[3]; + /* protects access to shared ASPEED_PTCR_RESULT */ + struct mutex tach_lock; }; enum type { TYPEM, TYPEN, TYPEO }; @@ -529,6 +531,8 @@ static int aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv, u8 fan_tach_ch_source, type, mode, both; int ret; + mutex_lock(&priv->tach_lock); + regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0); regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0x1 << fan_tach_ch); @@ -546,6 +550,8 @@ static int aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv, ASPEED_RPM_STATUS_SLEEP_USEC, usec); + mutex_unlock(&priv->tach_lock); + /* return -ETIMEDOUT if we didn't get an answer. */ if (ret) return ret; @@ -915,6 +921,7 @@ static int aspeed_pwm_tacho_probe(struct platform_device *pdev) priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; + mutex_init(&priv->tach_lock); priv->regmap = devm_regmap_init(dev, NULL, (__force void *)regs, &aspeed_pwm_tacho_regmap_config); if (IS_ERR(priv->regmap)) -- GitLab From 4e440abc894585a34c2904a32cd54af1742311b3 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 2 Feb 2024 17:21:34 +0800 Subject: [PATCH 0652/1405] hwmon: (coretemp) Fix out-of-bounds memory access Fix a bug that pdata->cpu_map[] is set before out-of-bounds check. The problem might be triggered on systems with more than 128 cores per package. Fixes: 7108b80a542b ("hwmon/coretemp: Handle large core ID value") Signed-off-by: Zhang Rui Cc: Link: https://lore.kernel.org/r/20240202092144.71180-2-rui.zhang@intel.com Signed-off-by: Guenter Roeck --- drivers/hwmon/coretemp.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index ba82d1e79c131..e78c769191118 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -509,18 +509,14 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, if (pkg_flag) { attr_no = PKG_SYSFS_ATTR_NO; } else { - index = ida_alloc(&pdata->ida, GFP_KERNEL); + index = ida_alloc_max(&pdata->ida, NUM_REAL_CORES - 1, GFP_KERNEL); if (index < 0) return index; + pdata->cpu_map[index] = topology_core_id(cpu); attr_no = index + BASE_SYSFS_ATTR_NO; } - if (attr_no > MAX_CORE_DATA - 1) { - err = -ERANGE; - goto ida_free; - } - tdata = init_temp_data(cpu, pkg_flag); if (!tdata) { err = -ENOMEM; -- GitLab From fdaf0c8629d4524a168cb9e4ad4231875749b28c Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 2 Feb 2024 17:21:35 +0800 Subject: [PATCH 0653/1405] hwmon: (coretemp) Fix bogus core_id to attr name mapping Before commit 7108b80a542b ("hwmon/coretemp: Handle large core ID value"), there is a fixed mapping between 1. cpu_core_id 2. the index in pdata->core_data[] array 3. the sysfs attr name, aka "tempX_" The later two always equal cpu_core_id + 2. After the commit, pdata->core_data[] index is got from ida so that it can handle sparse core ids and support more cores within a package. However, the commit erroneously maps the sysfs attr name to pdata->core_data[] index instead of cpu_core_id + 2. As a result, the code is not aligned with the comments, and brings user visible changes in hwmon sysfs on systems with sparse core id. For example, before commit 7108b80a542b ("hwmon/coretemp: Handle large core ID value"), /sys/class/hwmon/hwmon2/temp2_label:Core 0 /sys/class/hwmon/hwmon2/temp3_label:Core 1 /sys/class/hwmon/hwmon2/temp4_label:Core 2 /sys/class/hwmon/hwmon2/temp5_label:Core 3 /sys/class/hwmon/hwmon2/temp6_label:Core 4 /sys/class/hwmon/hwmon3/temp10_label:Core 8 /sys/class/hwmon/hwmon3/temp11_label:Core 9 after commit, /sys/class/hwmon/hwmon2/temp2_label:Core 0 /sys/class/hwmon/hwmon2/temp3_label:Core 1 /sys/class/hwmon/hwmon2/temp4_label:Core 2 /sys/class/hwmon/hwmon2/temp5_label:Core 3 /sys/class/hwmon/hwmon2/temp6_label:Core 4 /sys/class/hwmon/hwmon2/temp7_label:Core 8 /sys/class/hwmon/hwmon2/temp8_label:Core 9 Restore the previous behavior and rework the code, comments and variable names to avoid future confusions. Fixes: 7108b80a542b ("hwmon/coretemp: Handle large core ID value") Signed-off-by: Zhang Rui Link: https://lore.kernel.org/r/20240202092144.71180-3-rui.zhang@intel.com Signed-off-by: Guenter Roeck --- drivers/hwmon/coretemp.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index e78c769191118..95f4c0b00b2d8 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -419,7 +419,7 @@ static ssize_t show_temp(struct device *dev, } static int create_core_attrs(struct temp_data *tdata, struct device *dev, - int attr_no) + int index) { int i; static ssize_t (*const rd_ptr[TOTAL_ATTRS]) (struct device *dev, @@ -431,13 +431,20 @@ static int create_core_attrs(struct temp_data *tdata, struct device *dev, }; for (i = 0; i < tdata->attr_size; i++) { + /* + * We map the attr number to core id of the CPU + * The attr number is always core id + 2 + * The Pkgtemp will always show up as temp1_*, if available + */ + int attr_no = tdata->is_pkg_data ? 1 : tdata->cpu_core_id + 2; + snprintf(tdata->attr_name[i], CORETEMP_NAME_LENGTH, "temp%d_%s", attr_no, suffixes[i]); sysfs_attr_init(&tdata->sd_attrs[i].dev_attr.attr); tdata->sd_attrs[i].dev_attr.attr.name = tdata->attr_name[i]; tdata->sd_attrs[i].dev_attr.attr.mode = 0444; tdata->sd_attrs[i].dev_attr.show = rd_ptr[i]; - tdata->sd_attrs[i].index = attr_no; + tdata->sd_attrs[i].index = index; tdata->attrs[i] = &tdata->sd_attrs[i].dev_attr.attr; } tdata->attr_group.attrs = tdata->attrs; @@ -495,26 +502,25 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, struct platform_data *pdata = platform_get_drvdata(pdev); struct cpuinfo_x86 *c = &cpu_data(cpu); u32 eax, edx; - int err, index, attr_no; + int err, index; if (!housekeeping_cpu(cpu, HK_TYPE_MISC)) return 0; /* - * Find attr number for sysfs: - * We map the attr number to core id of the CPU - * The attr number is always core id + 2 - * The Pkgtemp will always show up as temp1_*, if available + * Get the index of tdata in pdata->core_data[] + * tdata for package: pdata->core_data[1] + * tdata for core: pdata->core_data[2] .. pdata->core_data[NUM_REAL_CORES + 1] */ if (pkg_flag) { - attr_no = PKG_SYSFS_ATTR_NO; + index = PKG_SYSFS_ATTR_NO; } else { index = ida_alloc_max(&pdata->ida, NUM_REAL_CORES - 1, GFP_KERNEL); if (index < 0) return index; pdata->cpu_map[index] = topology_core_id(cpu); - attr_no = index + BASE_SYSFS_ATTR_NO; + index += BASE_SYSFS_ATTR_NO; } tdata = init_temp_data(cpu, pkg_flag); @@ -540,20 +546,20 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, if (get_ttarget(tdata, &pdev->dev) >= 0) tdata->attr_size++; - pdata->core_data[attr_no] = tdata; + pdata->core_data[index] = tdata; /* Create sysfs interfaces */ - err = create_core_attrs(tdata, pdata->hwmon_dev, attr_no); + err = create_core_attrs(tdata, pdata->hwmon_dev, index); if (err) goto exit_free; return 0; exit_free: - pdata->core_data[attr_no] = NULL; + pdata->core_data[index] = NULL; kfree(tdata); ida_free: if (!pkg_flag) - ida_free(&pdata->ida, index); + ida_free(&pdata->ida, index - BASE_SYSFS_ATTR_NO); return err; } -- GitLab From 34cf8c657cf0365791cdc658ddbca9cc907726ce Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 2 Feb 2024 17:21:36 +0800 Subject: [PATCH 0654/1405] hwmon: (coretemp) Enlarge per package core count limit Currently, coretemp driver supports only 128 cores per package. This loses some core temperature information on systems that have more than 128 cores per package. [ 58.685033] coretemp coretemp.0: Adding Core 128 failed [ 58.692009] coretemp coretemp.0: Adding Core 129 failed ... Enlarge the limitation to 512 because there are platforms with more than 256 cores per package. Signed-off-by: Zhang Rui Link: https://lore.kernel.org/r/20240202092144.71180-4-rui.zhang@intel.com Signed-off-by: Guenter Roeck --- drivers/hwmon/coretemp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 95f4c0b00b2d8..b8fc8d1ef20df 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -41,7 +41,7 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius"); #define PKG_SYSFS_ATTR_NO 1 /* Sysfs attribute for package temp */ #define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */ -#define NUM_REAL_CORES 128 /* Number of Real cores per cpu */ +#define NUM_REAL_CORES 512 /* Number of Real cores per cpu */ #define CORETEMP_NAME_LENGTH 28 /* String Length of attrs */ #define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */ #define TOTAL_ATTRS (MAX_CORE_ATTRS + 1) -- GitLab From 862cf85fef85becc55a173387527adb4f076fab0 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Wed, 31 Jan 2024 10:16:47 +0100 Subject: [PATCH 0655/1405] iio: commom: st_sensors: ensure proper DMA alignment Aligning the buffer to the L1 cache is not sufficient in some platforms as they might have larger cacheline sizes for caches after L1 and thus, we can't guarantee DMA safety. That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same for st_sensors common buffer. While at it, moved the odr_lock before buffer_data as we definitely don't want any other data to share a cacheline with the buffer. [1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/ Fixes: e031d5f558f1 ("iio:st_sensors: remove buffer allocation at each buffer enable") Signed-off-by: Nuno Sa Cc: Link: https://lore.kernel.org/r/20240131-dev_dma_safety_stm-v2-1-580c07fae51b@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/common/st_sensors.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 607c3a89a6471..f9ae5cdd884f5 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -258,9 +258,9 @@ struct st_sensor_data { bool hw_irq_trigger; s64 hw_timestamp; - char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned; - struct mutex odr_lock; + + char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] __aligned(IIO_DMA_MINALIGN); }; #ifdef CONFIG_IIO_BUFFER -- GitLab From 4cb81840d8f29b66d9d05c6d7f360c9560f7e2f4 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 31 Jan 2024 16:52:46 -0600 Subject: [PATCH 0656/1405] iio: accel: bma400: Fix a compilation problem The kernel fails when compiling without `CONFIG_REGMAP_I2C` but with `CONFIG_BMA400`. ``` ld: drivers/iio/accel/bma400_i2c.o: in function `bma400_i2c_probe': bma400_i2c.c:(.text+0x23): undefined reference to `__devm_regmap_init_i2c' ``` Link: https://download.01.org/0day-ci/archive/20240131/202401311634.FE5CBVwe-lkp@intel.com/config Fixes: 465c811f1f20 ("iio: accel: Add driver for the BMA400") Fixes: 9bea10642396 ("iio: accel: bma400: add support for bma400 spi") Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20240131225246.14169-1-mario.limonciello@amd.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig index 91adcac875a41..c9d7afe489e83 100644 --- a/drivers/iio/accel/Kconfig +++ b/drivers/iio/accel/Kconfig @@ -219,10 +219,12 @@ config BMA400 config BMA400_I2C tristate + select REGMAP_I2C depends on BMA400 config BMA400_SPI tristate + select REGMAP_SPI depends on BMA400 config BMC150_ACCEL -- GitLab From eef00a82c568944f113f2de738156ac591bbd5cd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Feb 2024 09:54:04 +0000 Subject: [PATCH 0657/1405] inet: read sk->sk_family once in inet_recv_error() inet_recv_error() is called without holding the socket lock. IPv6 socket could mutate to IPv4 with IPV6_ADDRFORM socket option and trigger a KCSAN warning. Fixes: f4713a3dfad0 ("net-timestamp: make tcp_recvmsg call ipv6_recv_error for AF_INET6 socks") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4e635dd3d3c8c..a5a820ee20266 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1628,10 +1628,12 @@ EXPORT_SYMBOL(inet_current_timestamp); int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { - if (sk->sk_family == AF_INET) + unsigned int family = READ_ONCE(sk->sk_family); + + if (family == AF_INET) return ip_recv_error(sk, msg, len, addr_len); #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) + if (family == AF_INET6) return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len); #endif return -EINVAL; -- GitLab From a40f93e9286968863c661f2b9e314d97adc2f84e Mon Sep 17 00:00:00 2001 From: Mike Tipton Date: Thu, 1 Feb 2024 17:48:05 -0800 Subject: [PATCH 0658/1405] interconnect: qcom: sm8650: Use correct ACV enable_mask The ACV enable_mask is historically BIT(3), but it's BIT(0) on this target. Fix it. Fixes: c062bcab5924 ("interconnect: qcom: introduce RPMh Network-On-Chip Interconnect on SM8650 SoC") Signed-off-by: Mike Tipton Reviewed-by: Konrad Dybcio Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20240202014806.7876-2-quic_mdtipton@quicinc.com Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/sm8650.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/interconnect/qcom/sm8650.c b/drivers/interconnect/qcom/sm8650.c index b83de54577b68..b962e6c233ef7 100644 --- a/drivers/interconnect/qcom/sm8650.c +++ b/drivers/interconnect/qcom/sm8650.c @@ -1160,7 +1160,7 @@ static struct qcom_icc_node qns_gemnoc_sf = { static struct qcom_icc_bcm bcm_acv = { .name = "ACV", - .enable_mask = BIT(3), + .enable_mask = BIT(0), .num_nodes = 1, .nodes = { &ebi }, }; -- GitLab From 5464e7acea4a6c56b3c5c2d7aeef2eda92227b33 Mon Sep 17 00:00:00 2001 From: Mike Tipton Date: Thu, 1 Feb 2024 17:48:06 -0800 Subject: [PATCH 0659/1405] interconnect: qcom: x1e80100: Add missing ACV enable_mask The ACV BCM is voted using bitmasks. Add the proper mask for this target. Fixes: 9f196772841e ("interconnect: qcom: Add X1E80100 interconnect provider driver") Signed-off-by: Mike Tipton Reviewed-by: Konrad Dybcio Tested-by: Abel Vesa Link: https://lore.kernel.org/r/20240202014806.7876-3-quic_mdtipton@quicinc.com Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/x1e80100.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/interconnect/qcom/x1e80100.c b/drivers/interconnect/qcom/x1e80100.c index d19501d913b39..cbaf4f9c41be6 100644 --- a/drivers/interconnect/qcom/x1e80100.c +++ b/drivers/interconnect/qcom/x1e80100.c @@ -1586,6 +1586,7 @@ static struct qcom_icc_node qns_pcie_south_gem_noc_pcie = { static struct qcom_icc_bcm bcm_acv = { .name = "ACV", + .enable_mask = BIT(3), .num_nodes = 1, .nodes = { &ebi }, }; -- GitLab From 1a00897e5e96c29b21580dfcfec168dc16c67469 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 4 Aug 2023 12:05:44 +0800 Subject: [PATCH 0660/1405] drm/i915: Replace dead 01.org link 01.org is dead so replace old gvt link with current wiki page. Acked-by: Jani Nikula Reviewed-by: Zhi Wang Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20230804040544.1972958-1-zhenyuw@linux.intel.com --- MAINTAINERS | 2 +- drivers/gpu/drm/i915/Kconfig | 2 +- drivers/gpu/drm/i915/intel_gvt.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a692..03fbe24bf40d8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10804,7 +10804,7 @@ M: Zhi Wang L: intel-gvt-dev@lists.freedesktop.org L: intel-gfx@lists.freedesktop.org S: Supported -W: https://01.org/igvt-g +W: https://github.com/intel/gvt-linux/wiki T: git https://github.com/intel/gvt-linux.git F: drivers/gpu/drm/i915/gvt/ diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index b5d6e3352071f..3089029abba48 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -140,7 +140,7 @@ config DRM_I915_GVT_KVMGT Note that this driver only supports newer device from Broadwell on. For further information and setup guide, you can visit: - http://01.org/igvt-g. + https://github.com/intel/gvt-linux/wiki. If in doubt, say "N". diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index e98b6d69a91ab..9b6d87c8b5831 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -41,7 +41,7 @@ * To virtualize GPU resources GVT-g driver depends on hypervisor technology * e.g KVM/VFIO/mdev, Xen, etc. to provide resource access trapping capability * and be virtualized within GVT-g device module. More architectural design - * doc is available on https://01.org/group/2230/documentation-list. + * doc is available on https://github.com/intel/gvt-linux/wiki. */ static LIST_HEAD(intel_gvt_devices); -- GitLab From 47caa96478b99d6d1199b89467cc3e5a6cc754ee Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 26 Jan 2024 11:41:47 +0300 Subject: [PATCH 0661/1405] drm/i915/gvt: Fix uninitialized variable in handle_mmio() This code prints the wrong variable in the warning message. It should print "i" instead of "info->offset". On the first iteration "info" is uninitialized leading to a crash and on subsequent iterations it prints the previous offset instead of the current one. Fixes: e0f74ed4634d ("i915/gvt: Separate the MMIO tracking table from GVT-g") Signed-off-by: Dan Carpenter Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/11957c20-b178-4027-9b0a-e32e9591dd7c@moroto.mountain Reviewed-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 90f6c1ece57d4..efcb00472be24 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2849,8 +2849,7 @@ static int handle_mmio(struct intel_gvt_mmio_table_iter *iter, u32 offset, for (i = start; i < end; i += 4) { p = intel_gvt_find_mmio_info(gvt, i); if (p) { - WARN(1, "dup mmio definition offset %x\n", - info->offset); + WARN(1, "dup mmio definition offset %x\n", i); /* We return -EEXIST here to make GVT-g load fail. * So duplicated MMIO can be found as soon as -- GitLab From 44e4192f88978e32e4ac08b27141f3767366f79b Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 30 Jan 2024 21:27:43 +0000 Subject: [PATCH 0662/1405] MAINTAINERS: Update Zhi Wang's email address Update my email address to zhi.wang.linux@gmail.com. CC: Zhenyu Wang Acked-by: Zhenyu Wang Signed-off-by: Zhi Wang Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20240130212743.7727-1-zhi.wang.linux@gmail.com --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 03fbe24bf40d8..052149d304baa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10800,7 +10800,7 @@ F: drivers/gpio/gpio-tangier.h INTEL GVT-g DRIVERS (Intel GPU Virtualization) M: Zhenyu Wang -M: Zhi Wang +M: Zhi Wang L: intel-gvt-dev@lists.freedesktop.org L: intel-gfx@lists.freedesktop.org S: Supported -- GitLab From 8ded03ae48b3657e0fbca99d8a9d8fa1bfb8c9be Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 2 Feb 2024 18:26:46 -0600 Subject: [PATCH 0663/1405] powerpc/pseries/papr-sysparm: use u8 arrays for payloads Some PAPR system parameter values are formatted by firmware as nul-terminated strings (e.g. LPAR name, shared processor attributes). But the values returned for other parameters, such as processor module info and TLB block invalidate characteristics, are binary data with parameter-specific layouts. So char[] isn't the appropriate type for the general case. Use u8/__u8. Signed-off-by: Nathan Lynch Fixes: 905b9e48786e ("powerpc/pseries/papr-sysparm: Expose character device to user space") Signed-off-by: Michael Ellerman Link: https://msgid.link/20240202-papr-sysparm-ioblock-data-use-u8-v1-1-f5c6c89f65ec@linux.ibm.com --- arch/powerpc/include/asm/papr-sysparm.h | 2 +- arch/powerpc/include/uapi/asm/papr-sysparm.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/papr-sysparm.h b/arch/powerpc/include/asm/papr-sysparm.h index 0dbbff59101d6..c3cd5b131033e 100644 --- a/arch/powerpc/include/asm/papr-sysparm.h +++ b/arch/powerpc/include/asm/papr-sysparm.h @@ -32,7 +32,7 @@ typedef struct { */ struct papr_sysparm_buf { __be16 len; - char val[PAPR_SYSPARM_MAX_OUTPUT]; + u8 val[PAPR_SYSPARM_MAX_OUTPUT]; }; struct papr_sysparm_buf *papr_sysparm_buf_alloc(void); diff --git a/arch/powerpc/include/uapi/asm/papr-sysparm.h b/arch/powerpc/include/uapi/asm/papr-sysparm.h index 9f9a0f267ea57..f733467b1534e 100644 --- a/arch/powerpc/include/uapi/asm/papr-sysparm.h +++ b/arch/powerpc/include/uapi/asm/papr-sysparm.h @@ -14,7 +14,7 @@ enum { struct papr_sysparm_io_block { __u32 parameter; __u16 length; - char data[PAPR_SYSPARM_MAX_OUTPUT]; + __u8 data[PAPR_SYSPARM_MAX_OUTPUT]; }; /** -- GitLab From dd839f31d7cd5e04f4111a219024268c6f6973f0 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 4 Feb 2024 08:51:52 +0100 Subject: [PATCH 0664/1405] bcachefs: install fd later to avoid race with close Calling fd_install() makes a file reachable for userland, including the possibility to close the file descriptor, which leads to calling its 'release' hook. If that happens before the code had a chance to bump the reference of the newly created task struct, the release callback will call put_task_struct() too early, leading to the premature destruction of the kernel thread. Avoid that race by calling fd_install() later, after all the setup is done. Fixes: 1c6fdbd8f246 ("bcachefs: Initial commit") Signed-off-by: Mathias Krause Signed-off-by: Kent Overstreet --- fs/bcachefs/thread_with_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c index b1c867aa2b58e..9220d7de10db6 100644 --- a/fs/bcachefs/thread_with_file.c +++ b/fs/bcachefs/thread_with_file.c @@ -53,9 +53,9 @@ int bch2_run_thread_with_file(struct thread_with_file *thr, if (ret) goto err; - fd_install(fd, file); get_task_struct(thr->task); wake_up_process(thr->task); + fd_install(fd, file); return fd; err: if (fd >= 0) -- GitLab From 7b508b323b2ec45be59769bd4e4aeba729c52cf6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 1 Feb 2024 21:01:02 -0500 Subject: [PATCH 0665/1405] bcachefs: time_stats: Check for last_event == 0 when updating freq stats This fixes spurious outliers in the frequency stats. Signed-off-by: Kent Overstreet --- fs/bcachefs/util.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 56b815fd9fc6e..231003b405efc 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -418,14 +418,15 @@ static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats, bch2_quantiles_update(&stats->quantiles, duration); } - if (time_after64(end, stats->last_event)) { + if (stats->last_event && time_after64(end, stats->last_event)) { freq = end - stats->last_event; mean_and_variance_update(&stats->freq_stats, freq); mean_and_variance_weighted_update(&stats->freq_stats_weighted, freq); stats->max_freq = max(stats->max_freq, freq); stats->min_freq = min(stats->min_freq, freq); - stats->last_event = end; } + + stats->last_event = end; } static void __bch2_time_stats_clear_buffer(struct bch2_time_stats *stats, -- GitLab From 4639c5021029d49fd2f97fa8d74731f167f98919 Mon Sep 17 00:00:00 2001 From: bo liu Date: Mon, 5 Feb 2024 09:38:02 +0800 Subject: [PATCH 0666/1405] ALSA: hda/conexant: Add quirk for SWS JS201D The SWS JS201D need a different pinconfig from windows driver. Add a quirk to use a specific pinconfig to SWS JS201D. Signed-off-by: bo liu Cc: Link: https://lore.kernel.org/r/20240205013802.51907-1-bo.liu@senarytech.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index e8819e8a98763..e8209178d87bb 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -344,6 +344,7 @@ enum { CXT_FIXUP_HP_ZBOOK_MUTE_LED, CXT_FIXUP_HEADSET_MIC, CXT_FIXUP_HP_MIC_NO_PRESENCE, + CXT_PINCFG_SWS_JS201D, }; /* for hda_fixup_thinkpad_acpi() */ @@ -841,6 +842,17 @@ static const struct hda_pintbl cxt_pincfg_lemote[] = { {} }; +/* SuoWoSi/South-holding JS201D with sn6140 */ +static const struct hda_pintbl cxt_pincfg_sws_js201d[] = { + { 0x16, 0x03211040 }, /* hp out */ + { 0x17, 0x91170110 }, /* SPK/Class_D */ + { 0x18, 0x95a70130 }, /* Internal mic */ + { 0x19, 0x03a11020 }, /* Headset Mic */ + { 0x1a, 0x40f001f0 }, /* Not used */ + { 0x21, 0x40f001f0 }, /* Not used */ + {} +}; + static const struct hda_fixup cxt_fixups[] = { [CXT_PINCFG_LENOVO_X200] = { .type = HDA_FIXUP_PINS, @@ -996,6 +1008,10 @@ static const struct hda_fixup cxt_fixups[] = { .chained = true, .chain_id = CXT_FIXUP_HEADSET_MIC, }, + [CXT_PINCFG_SWS_JS201D] = { + .type = HDA_FIXUP_PINS, + .v.pins = cxt_pincfg_sws_js201d, + }, }; static const struct snd_pci_quirk cxt5045_fixups[] = { @@ -1069,6 +1085,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x8457, "HP Z2 G4 mini", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8458, "HP Z2 G4 mini premium", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), + SND_PCI_QUIRK(0x14f1, 0x0265, "SWS JS201D", CXT_PINCFG_SWS_JS201D), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO), SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x215e, "Lenovo T410", CXT_PINCFG_LENOVO_TP410), @@ -1109,6 +1126,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = { { .id = CXT_FIXUP_HP_ZBOOK_MUTE_LED, .name = "hp-zbook-mute-led" }, { .id = CXT_FIXUP_HP_MIC_NO_PRESENCE, .name = "hp-mic-fix" }, { .id = CXT_PINCFG_LENOVO_NOTEBOOK, .name = "lenovo-20149" }, + { .id = CXT_PINCFG_SWS_JS201D, .name = "sws-js201d" }, {} }; -- GitLab From fddab35fd064414c677e9488c4fb3a1f67725d37 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Mon, 5 Feb 2024 15:22:52 +0800 Subject: [PATCH 0667/1405] ALSA: hda/realtek: add IDs for Dell dual spk platform This patch adds another two IDs for the Dell dual speaker platform. Signed-off-by: Shuming Fan Cc: Link: https://lore.kernel.org/r/20240205072252.3791500-1-shumingf@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6994c4c5073cb..e045d3e76a45e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9737,7 +9737,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0b71, "Dell Inspiron 16 Plus 7620", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), SND_PCI_QUIRK(0x1028, 0x0beb, "Dell XPS 15 9530 (2023)", ALC289_FIXUP_DELL_CS35L41_SPI_2), SND_PCI_QUIRK(0x1028, 0x0c03, "Dell Precision 5340", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0c0b, "Dell Oasis 14 RPL-P", ALC289_FIXUP_RTK_AMP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0c0d, "Dell Oasis", ALC289_FIXUP_RTK_AMP_DUAL_SPK), + SND_PCI_QUIRK(0x1028, 0x0c0e, "Dell Oasis 16", ALC289_FIXUP_RTK_AMP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0c19, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1a, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1b, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS), -- GitLab From fdfa083549de5d50ebf7f6811f33757781e838c0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sun, 4 Feb 2024 16:42:07 -0800 Subject: [PATCH 0668/1405] RDMA/srpt: Support specifying the srpt_service_guid parameter Make loading ib_srpt with this parameter set work. The current behavior is that setting that parameter while loading the ib_srpt kernel module triggers the following kernel crash: BUG: kernel NULL pointer dereference, address: 0000000000000000 Call Trace: parse_one+0x18c/0x1d0 parse_args+0xe1/0x230 load_module+0x8de/0xa60 init_module_from_file+0x8b/0xd0 idempotent_init_module+0x181/0x240 __x64_sys_finit_module+0x5a/0xb0 do_syscall_64+0x5f/0xe0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 Cc: LiHonggang Reported-by: LiHonggang Fixes: a42d985bd5b2 ("ib_srpt: Initial SRP Target merge for v3.3-rc1") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240205004207.17031-1-bvanassche@acm.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/srpt/ib_srpt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 58f70cfec45a7..d2dce6ce30a94 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -79,12 +79,16 @@ module_param(srpt_srq_size, int, 0444); MODULE_PARM_DESC(srpt_srq_size, "Shared receive queue (SRQ) size."); +static int srpt_set_u64_x(const char *buffer, const struct kernel_param *kp) +{ + return kstrtou64(buffer, 16, (u64 *)kp->arg); +} static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "0x%016llx\n", *(u64 *)kp->arg); } -module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid, - 0444); +module_param_call(srpt_service_guid, srpt_set_u64_x, srpt_get_u64_x, + &srpt_service_guid, 0444); MODULE_PARM_DESC(srpt_service_guid, "Using this value for ioc_guid, id_ext, and cm_listen_id instead of using the node_guid of the first HCA."); -- GitLab From a620a7f2ae8b08c5beea6369f61e87064ee222dc Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 10 Jan 2024 10:08:49 +0100 Subject: [PATCH 0669/1405] arm64: dts: tqma8mpql: fix audio codec iov-supply IOVDD is supplied by 1.8V, fix the referenced regulator. Fixes: d8f9d8126582d ("arm64: dts: imx8mp: Add analog audio output on i.MX8MP TQMa8MPxL/MBa8MPxL") Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo --- .../boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts index a2d5d19b2de0c..86d3da36e4f3e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql-mba8mpxl.dts @@ -184,6 +184,13 @@ enable-active-high; }; + reg_vcc_1v8: regulator-1v8 { + compatible = "regulator-fixed"; + regulator-name = "VCC_1V8"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + }; + reg_vcc_3v3: regulator-3v3 { compatible = "regulator-fixed"; regulator-name = "VCC_3V3"; @@ -480,7 +487,7 @@ clock-names = "mclk"; clocks = <&audio_blk_ctrl IMX8MP_CLK_AUDIOMIX_SAI3_MCLK1>; reset-gpios = <&gpio4 29 GPIO_ACTIVE_LOW>; - iov-supply = <®_vcc_3v3>; + iov-supply = <®_vcc_1v8>; ldoin-supply = <®_vcc_3v3>; }; -- GitLab From f954785a124e77d4e6bb52cab689a8de447999aa Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 13 Jan 2024 19:43:32 -0300 Subject: [PATCH 0670/1405] Revert "arm64: dts: imx8mp-dhcom-pdk3: Describe the USB-C connector" This reverts commit a4dca89fe8a1585af73e362f5f4e3189a00abf8e. Marek reported: "This patch breaks USB-C port on this board. If I plug in USB-C storage device, it is not detected. If I revert this patch, it is detected. Please drop this patch for now." Revert it to avoid the regression. Reported-by: Marek Vasut Closes: https://lore.kernel.org/linux-arm-kernel/623c294a-5c53-4e01-acbf-104acc180e14@denx.de/T/#u Fixes: a4dca89fe8a1 ("arm64: dts: imx8mp-dhcom-pdk3: Describe the USB-C connector") Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp-dhcom-pdk3.dts | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-pdk3.dts b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-pdk3.dts index fea67a9282f03..b749e28e5ede5 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-pdk3.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-pdk3.dts @@ -175,14 +175,10 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_ptn5150>; - connector { - compatible = "usb-c-connector"; - label = "USB-C"; - - port { - ptn5150_out_ep: endpoint { - remote-endpoint = <&dwc3_0_ep>; - }; + port { + + ptn5150_out_ep: endpoint { + remote-endpoint = <&dwc3_0_ep>; }; }; }; -- GitLab From 690085d866f08cc72ae4d601821564a0b63e32f3 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 13 Jan 2024 19:43:33 -0300 Subject: [PATCH 0671/1405] Revert "arm64: dts: imx8mn-var-som-symphony: Describe the USB-C connector" This reverts commit 095b96b2b8c6dcabf40bbfe4bb99a95fe55c7463. Marek reported the similar change in imx8mp-dhcom-pdk3.dts caused a regression: "This patch breaks USB-C port on this board. If I plug in USB-C storage device, it is not detected. If I revert this patch, it is detected. Please drop this patch for now." Revert it here as well. Fixes: 095b96b2b8c6 ("arm64: dts: imx8mn-var-som-symphony: Describe the USB-C connector") Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- .../boot/dts/freescale/imx8mn-var-som-symphony.dts | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som-symphony.dts b/arch/arm64/boot/dts/freescale/imx8mn-var-som-symphony.dts index f38ee2266b25d..a6b94d1957c92 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-var-som-symphony.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som-symphony.dts @@ -128,14 +128,9 @@ pinctrl-0 = <&pinctrl_ptn5150>; status = "okay"; - connector { - compatible = "usb-c-connector"; - label = "USB-C"; - - port { - typec1_dr_sw: endpoint { - remote-endpoint = <&usb1_drd_sw>; - }; + port { + typec1_dr_sw: endpoint { + remote-endpoint = <&usb1_drd_sw>; }; }; }; -- GitLab From 1ad55cecf22f05f1c884adf63cc09d3c3e609ebf Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 5 Feb 2024 09:11:07 +0100 Subject: [PATCH 0672/1405] x86/efistub: Use 1:1 file:memory mapping for PE/COFF .compat section The .compat section is a dummy PE section that contains the address of the 32-bit entrypoint of the 64-bit kernel image if it is bootable from 32-bit firmware (i.e., CONFIG_EFI_MIXED=y) This section is only 8 bytes in size and is only referenced from the loader, and so it is placed at the end of the memory view of the image, to avoid the need for padding it to 4k, which is required for sections appearing in the middle of the image. Unfortunately, this violates the PE/COFF spec, and even if most EFI loaders will work correctly (including the Tianocore reference implementation), PE loaders do exist that reject such images, on the basis that both the file and memory views of the file contents should be described by the section headers in a monotonically increasing manner without leaving any gaps. So reorganize the sections to avoid this issue. This results in a slight padding overhead (< 4k) which can be avoided if desired by disabling CONFIG_EFI_MIXED (which is only needed in rare cases these days) Fixes: 3e3eabe26dc8 ("x86/boot: Increase section and file alignment to 4k/512") Reported-by: Mike Beaton Link: https://lkml.kernel.org/r/CAHzAAWQ6srV6LVNdmfbJhOwhBw5ZzxxZZ07aHt9oKkfYAdvuQQ%40mail.gmail.com Signed-off-by: Ard Biesheuvel --- arch/x86/boot/header.S | 14 ++++++-------- arch/x86/boot/setup.ld | 6 +++--- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index b2771710ed989..a1bbedd989e42 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -106,8 +106,7 @@ extra_header_fields: .word 0 # MinorSubsystemVersion .long 0 # Win32VersionValue - .long setup_size + ZO__end + pecompat_vsize - # SizeOfImage + .long setup_size + ZO__end # SizeOfImage .long salign # SizeOfHeaders .long 0 # CheckSum @@ -143,7 +142,7 @@ section_table: .ascii ".setup" .byte 0 .byte 0 - .long setup_size - salign # VirtualSize + .long pecompat_fstart - salign # VirtualSize .long salign # VirtualAddress .long pecompat_fstart - salign # SizeOfRawData .long salign # PointerToRawData @@ -156,8 +155,8 @@ section_table: #ifdef CONFIG_EFI_MIXED .asciz ".compat" - .long 8 # VirtualSize - .long setup_size + ZO__end # VirtualAddress + .long pecompat_fsize # VirtualSize + .long pecompat_fstart # VirtualAddress .long pecompat_fsize # SizeOfRawData .long pecompat_fstart # PointerToRawData @@ -172,17 +171,16 @@ section_table: * modes this image supports. */ .pushsection ".pecompat", "a", @progbits - .balign falign - .set pecompat_vsize, salign + .balign salign .globl pecompat_fstart pecompat_fstart: .byte 0x1 # Version .byte 8 # Size .word IMAGE_FILE_MACHINE_I386 # PE machine type .long setup_size + ZO_efi32_pe_entry # Entrypoint + .byte 0x0 # Sentinel .popsection #else - .set pecompat_vsize, 0 .set pecompat_fstart, setup_size #endif .ascii ".text" diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index 83bb7efad8ae7..3a2d1360abb01 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld @@ -24,6 +24,9 @@ SECTIONS .text : { *(.text .text.*) } .text32 : { *(.text32) } + .pecompat : { *(.pecompat) } + PROVIDE(pecompat_fsize = setup_size - pecompat_fstart); + . = ALIGN(16); .rodata : { *(.rodata*) } @@ -36,9 +39,6 @@ SECTIONS . = ALIGN(16); .data : { *(.data*) } - .pecompat : { *(.pecompat) } - PROVIDE(pecompat_fsize = setup_size - pecompat_fstart); - .signature : { setup_sig = .; LONG(0x5a5aaa55) -- GitLab From b3d4f7f2288901ed2392695919b3c0e24c1b4084 Mon Sep 17 00:00:00 2001 From: Daniel Basilio Date: Fri, 2 Feb 2024 13:37:17 +0200 Subject: [PATCH 0673/1405] nfp: use correct macro for LengthSelect in BAR config The 1st and 2nd expansion BAR configuration registers are configured, when the driver starts up, in variables 'barcfg_msix_general' and 'barcfg_msix_xpb', respectively. The 'LengthSelect' field is ORed in from bit 0, which is incorrect. The 'LengthSelect' field should start from bit 27. This has largely gone un-noticed because NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT happens to be 0. Fixes: 4cb584e0ee7d ("nfp: add CPP access core") Cc: stable@vger.kernel.org # 4.11+ Signed-off-by: Daniel Basilio Signed-off-by: Louis Peens Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c index 33b4c28563162..3f10c5365c80e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c @@ -537,11 +537,13 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface) const u32 barcfg_msix_general = NFP_PCIE_BAR_PCIE2CPP_MapType( NFP_PCIE_BAR_PCIE2CPP_MapType_GENERAL) | - NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT; + NFP_PCIE_BAR_PCIE2CPP_LengthSelect( + NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT); const u32 barcfg_msix_xpb = NFP_PCIE_BAR_PCIE2CPP_MapType( NFP_PCIE_BAR_PCIE2CPP_MapType_BULK) | - NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT | + NFP_PCIE_BAR_PCIE2CPP_LengthSelect( + NFP_PCIE_BAR_PCIE2CPP_LengthSelect_32BIT) | NFP_PCIE_BAR_PCIE2CPP_Target_BaseAddress( NFP_CPP_TARGET_ISLAND_XPB); const u32 barcfg_explicit[4] = { -- GitLab From 1a1c13303ff6d64e6f718dc8aa614e580ca8d9b4 Mon Sep 17 00:00:00 2001 From: Daniel de Villiers Date: Fri, 2 Feb 2024 13:37:18 +0200 Subject: [PATCH 0674/1405] nfp: flower: prevent re-adding mac index for bonded port When physical ports are reset (either through link failure or manually toggled down and up again) that are slaved to a Linux bond with a tunnel endpoint IP address on the bond device, not all tunnel packets arriving on the bond port are decapped as expected. The bond dev assigns the same MAC address to itself and each of its slaves. When toggling a slave device, the same MAC address is therefore offloaded to the NFP multiple times with different indexes. The issue only occurs when re-adding the shared mac. The nfp_tunnel_add_shared_mac() function has a conditional check early on that checks if a mac entry already exists and if that mac entry is global: (entry && nfp_tunnel_is_mac_idx_global(entry->index)). In the case of a bonded device (For example br-ex), the mac index is obtained, and no new index is assigned. We therefore modify the conditional in nfp_tunnel_add_shared_mac() to check if the port belongs to the LAG along with the existing checks to prevent a new global mac index from being re-assigned to the slave port. Fixes: 20cce8865098 ("nfp: flower: enable MAC address sharing for offloadable devs") CC: stable@vger.kernel.org # 5.1+ Signed-off-by: Daniel de Villiers Signed-off-by: Louis Peens Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index e522845c7c211..0d7d138d6e0d7 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -1084,7 +1084,7 @@ nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev, u16 nfp_mac_idx = 0; entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr); - if (entry && nfp_tunnel_is_mac_idx_global(entry->index)) { + if (entry && (nfp_tunnel_is_mac_idx_global(entry->index) || netif_is_lag_port(netdev))) { if (entry->bridge_count || !nfp_flower_is_supported_bridge(netdev)) { nfp_tunnel_offloaded_macs_inc_ref_and_link(entry, -- GitLab From 0f4d6f011bca0df2051532b41b596366aa272019 Mon Sep 17 00:00:00 2001 From: James Hershaw Date: Fri, 2 Feb 2024 13:37:19 +0200 Subject: [PATCH 0675/1405] nfp: enable NETDEV_XDP_ACT_REDIRECT feature flag Enable previously excluded xdp feature flag for NFD3 devices. This feature flag is required in order to bind nfp interfaces to an xdp socket and the nfp driver does in fact support the feature. Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features") Cc: stable@vger.kernel.org # 6.3+ Signed-off-by: James Hershaw Signed-off-by: Louis Peens Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 3b3210d823e80..f28e769e6fdad 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2776,6 +2776,7 @@ static void nfp_net_netdev_init(struct nfp_net *nn) case NFP_NFD_VER_NFD3: netdev->netdev_ops = &nfp_nfd3_netdev_ops; netdev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; + netdev->xdp_features |= NETDEV_XDP_ACT_REDIRECT; break; case NFP_NFD_VER_NFDK: netdev->netdev_ops = &nfp_nfdk_netdev_ops; -- GitLab From ed8b94f6e0acd652ce69bd69d678a0c769172df8 Mon Sep 17 00:00:00 2001 From: Gaurav Batra Date: Mon, 22 Jan 2024 16:24:07 -0600 Subject: [PATCH 0676/1405] powerpc/pseries/iommu: Fix iommu initialisation during DLPAR add When a PCI device is dynamically added, the kernel oopses with a NULL pointer dereference: BUG: Kernel NULL pointer dereference on read at 0x00000030 Faulting instruction address: 0xc0000000006bbe5c Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: rpadlpar_io rpaphp rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs xsk_diag bonding nft_compat nf_tables nfnetlink rfkill binfmt_misc dm_multipath rpcrdma sunrpc rdma_ucm ib_srpt ib_isert iscsi_target_mod target_core_mod ib_umad ib_iser libiscsi scsi_transport_iscsi ib_ipoib rdma_cm iw_cm ib_cm mlx5_ib ib_uverbs ib_core pseries_rng drm drm_panel_orientation_quirks xfs libcrc32c mlx5_core mlxfw sd_mod t10_pi sg tls ibmvscsi ibmveth scsi_transport_srp vmx_crypto pseries_wdt psample dm_mirror dm_region_hash dm_log dm_mod fuse CPU: 17 PID: 2685 Comm: drmgr Not tainted 6.7.0-203405+ #66 Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1060.00 (NH1060_008) hv:phyp pSeries NIP: c0000000006bbe5c LR: c000000000a13e68 CTR: c0000000000579f8 REGS: c00000009924f240 TRAP: 0300 Not tainted (6.7.0-203405+) MSR: 8000000000009033 CR: 24002220 XER: 20040006 CFAR: c000000000a13e64 DAR: 0000000000000030 DSISR: 40000000 IRQMASK: 0 ... NIP sysfs_add_link_to_group+0x34/0x94 LR iommu_device_link+0x5c/0x118 Call Trace: iommu_init_device+0x26c/0x318 (unreliable) iommu_device_link+0x5c/0x118 iommu_init_device+0xa8/0x318 iommu_probe_device+0xc0/0x134 iommu_bus_notifier+0x44/0x104 notifier_call_chain+0xb8/0x19c blocking_notifier_call_chain+0x64/0x98 bus_notify+0x50/0x7c device_add+0x640/0x918 pci_device_add+0x23c/0x298 of_create_pci_dev+0x400/0x884 of_scan_pci_dev+0x124/0x1b0 __of_scan_bus+0x78/0x18c pcibios_scan_phb+0x2a4/0x3b0 init_phb_dynamic+0xb8/0x110 dlpar_add_slot+0x170/0x3b8 [rpadlpar_io] add_slot_store.part.0+0xb4/0x130 [rpadlpar_io] kobj_attr_store+0x2c/0x48 sysfs_kf_write+0x64/0x78 kernfs_fop_write_iter+0x1b0/0x290 vfs_write+0x350/0x4a0 ksys_write+0x84/0x140 system_call_exception+0x124/0x330 system_call_vectored_common+0x15c/0x2ec Commit a940904443e4 ("powerpc/iommu: Add iommu_ops to report capabilities and allow blocking domains") broke DLPAR add of PCI devices. The above added iommu_device structure to pci_controller. During system boot, PCI devices are discovered and this newly added iommu_device structure is initialized by a call to iommu_device_register(). During DLPAR add of a PCI device, a new pci_controller structure is allocated but there are no calls made to iommu_device_register() interface. Fix is to register the iommu device during DLPAR add as well. Fixes: a940904443e4 ("powerpc/iommu: Add iommu_ops to report capabilities and allow blocking domains") Signed-off-by: Gaurav Batra [mpe: Trim oops and tweak some change log wording] Signed-off-by: Michael Ellerman Link: https://msgid.link/20240122222407.39603-1-gbatra@linux.ibm.com --- arch/powerpc/include/asm/ppc-pci.h | 3 +++ arch/powerpc/kernel/iommu.c | 21 ++++++++++++++++----- arch/powerpc/platforms/pseries/pci_dlpar.c | 4 ++++ 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index ce2b1b5eebddc..c3a3f3df36d16 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -29,6 +29,9 @@ void *pci_traverse_device_nodes(struct device_node *start, void *(*fn)(struct device_node *, void *), void *data); extern void pci_devs_phb_init_dynamic(struct pci_controller *phb); +extern void ppc_iommu_register_device(struct pci_controller *phb); +extern void ppc_iommu_unregister_device(struct pci_controller *phb); + /* From rtas_pci.h */ extern void init_pci_config_tokens (void); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index ebe259bdd4629..c6f62e130d55c 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1388,6 +1388,21 @@ static const struct attribute_group *spapr_tce_iommu_groups[] = { NULL, }; +void ppc_iommu_register_device(struct pci_controller *phb) +{ + iommu_device_sysfs_add(&phb->iommu, phb->parent, + spapr_tce_iommu_groups, "iommu-phb%04x", + phb->global_number); + iommu_device_register(&phb->iommu, &spapr_tce_iommu_ops, + phb->parent); +} + +void ppc_iommu_unregister_device(struct pci_controller *phb) +{ + iommu_device_unregister(&phb->iommu); + iommu_device_sysfs_remove(&phb->iommu); +} + /* * This registers IOMMU devices of PHBs. This needs to happen * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and @@ -1398,11 +1413,7 @@ static int __init spapr_tce_setup_phb_iommus_initcall(void) struct pci_controller *hose; list_for_each_entry(hose, &hose_list, list_node) { - iommu_device_sysfs_add(&hose->iommu, hose->parent, - spapr_tce_iommu_groups, "iommu-phb%04x", - hose->global_number); - iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops, - hose->parent); + ppc_iommu_register_device(hose); } return 0; } diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 4ba8245681192..4448386268d99 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -35,6 +35,8 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn) pseries_msi_allocate_domains(phb); + ppc_iommu_register_device(phb); + /* Create EEH devices for the PHB */ eeh_phb_pe_create(phb); @@ -76,6 +78,8 @@ int remove_phb_dynamic(struct pci_controller *phb) } } + ppc_iommu_unregister_device(phb); + pseries_msi_free_domains(phb); /* Keep a reference so phb isn't freed yet */ -- GitLab From aad98efd0b121f63a2e1c221dcb4d4850128c697 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Fri, 2 Feb 2024 21:13:16 +0530 Subject: [PATCH 0677/1405] powerpc/64: Set task pt_regs->link to the LR value on scv entry Nysal reported that userspace backtraces are missing in offcputime bcc tool. As an example: $ sudo ./bcc/tools/offcputime.py -uU Tracing off-CPU time (us) of user threads by user stack... Hit Ctrl-C to end. ^C write - python (9107) 8 write - sudo (9105) 9 mmap - python (9107) 16 clock_nanosleep - multipathd (697) 3001604 The offcputime bcc tool attaches a bpf program to a kprobe on finish_task_switch(), which is usually hit on a syscall from userspace. With the switch to system call vectored, we started setting pt_regs->link to zero. This is because system call vectored behaves like a function call with LR pointing to the system call return address, and with no modification to SRR0/SRR1. The LR value does indicate our next instruction, so it is being saved as pt_regs->nip, and pt_regs->link is being set to zero. This is not a problem by itself, but BPF uses perf callchain infrastructure for capturing stack traces, and that stores LR as the second entry in the stack trace. perf has code to cope with the second entry being zero, and skips over it. However, generic userspace unwinders assume that a zero entry indicates end of the stack trace, resulting in a truncated userspace stack trace. Rather than fixing all userspace unwinders to ignore/skip past the second entry, store the real LR value in pt_regs->link so that there continues to be a valid, though duplicate entry in the stack trace. With this change: $ sudo ./bcc/tools/offcputime.py -uU Tracing off-CPU time (us) of user threads by user stack... Hit Ctrl-C to end. ^C write write [unknown] [unknown] [unknown] [unknown] [unknown] PyObject_VectorcallMethod [unknown] [unknown] PyObject_CallOneArg PyFile_WriteObject PyFile_WriteString [unknown] [unknown] PyObject_Vectorcall _PyEval_EvalFrameDefault PyEval_EvalCode [unknown] [unknown] [unknown] _PyRun_SimpleFileObject _PyRun_AnyFileObject Py_RunMain [unknown] Py_BytesMain [unknown] __libc_start_main - python (1293) 7 write write [unknown] sudo_ev_loop_v1 sudo_ev_dispatch_v1 [unknown] [unknown] [unknown] [unknown] __libc_start_main - sudo (1291) 7 syscall syscall bpf_open_perf_buffer_opts [unknown] [unknown] [unknown] [unknown] _PyObject_MakeTpCall PyObject_Vectorcall _PyEval_EvalFrameDefault PyEval_EvalCode [unknown] [unknown] [unknown] _PyRun_SimpleFileObject _PyRun_AnyFileObject Py_RunMain [unknown] Py_BytesMain [unknown] __libc_start_main - python (1293) 11 clock_nanosleep clock_nanosleep nanosleep sleep [unknown] [unknown] __clone - multipathd (698) 3001661 Fixes: 7fa95f9adaee ("powerpc/64s: system call support for scv/rfscv instructions") Cc: stable@vger.kernel.org Reported-by: "Nysal Jan K.A" Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://msgid.link/20240202154316.395276-1-naveen@kernel.org --- arch/powerpc/kernel/interrupt_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index bd863702d8121..1ad059a9e2fef 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -52,7 +52,8 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) mr r10,r1 ld r1,PACAKSAVE(r13) std r10,0(r1) - std r11,_NIP(r1) + std r11,_LINK(r1) + std r11,_NIP(r1) /* Saved LR is also the next instruction */ std r12,_MSR(r1) std r0,GPR0(r1) std r10,GPR1(r1) @@ -70,7 +71,6 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) std r9,GPR13(r1) SAVE_NVGPRS(r1) std r11,_XER(r1) - std r11,_LINK(r1) std r11,_CTR(r1) li r11,\trapnr -- GitLab From f09696279b5dd1770a3de2e062f1c5d1449213ff Mon Sep 17 00:00:00 2001 From: R Nageswara Sastry Date: Wed, 31 Jan 2024 18:38:59 +0530 Subject: [PATCH 0678/1405] selftests/powerpc/papr_vpd: Check devfd before get_system_loc_code() Calling get_system_loc_code before checking devfd and errno fails the test when the device is not available, the expected behaviour is a SKIP. Change the order of 'SKIP_IF_MSG' to correctly SKIP when the /dev/ papr-vpd device is not available. Test output before: Test FAILED on line 271 Test output after: [SKIP] Test skipped on line 266: /dev/papr-vpd not present Signed-off-by: R Nageswara Sastry Signed-off-by: Michael Ellerman Link: https://msgid.link/20240131130859.14968-1-rnsastry@linux.ibm.com --- tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c index 98cbb9109ee6e..505294da1b9fb 100644 --- a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c +++ b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c @@ -263,10 +263,10 @@ static int papr_vpd_system_loc_code(void) off_t size; int fd; - SKIP_IF_MSG(get_system_loc_code(&lc), - "Cannot determine system location code"); SKIP_IF_MSG(devfd < 0 && errno == ENOENT, DEVPATH " not present"); + SKIP_IF_MSG(get_system_loc_code(&lc), + "Cannot determine system location code"); FAIL_IF(devfd < 0); -- GitLab From a038a3ff8c6582404834852c043dadc73a5b68b4 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Wed, 24 Jan 2024 11:38:38 +0100 Subject: [PATCH 0679/1405] powerpc/6xx: set High BAT Enable flag on G2_LE cores MMU_FTR_USE_HIGH_BATS is set for G2_LE cores and derivatives like e300cX, but the high BATs need to be enabled in HID2 to work. Add register definitions and add the needed setup to __setup_cpu_603. This fixes boot on CPUs like the MPC5200B with STRICT_KERNEL_RWX enabled on systems where the flag has not been set by the bootloader already. Fixes: e4d6654ebe6e ("powerpc/mm/32s: rework mmu_mapin_ram()") Signed-off-by: Matthias Schiffer Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/20240124103838.43675-1-matthias.schiffer@ew.tq-group.com --- arch/powerpc/include/asm/reg.h | 2 ++ arch/powerpc/kernel/cpu_setup_6xx.S | 20 +++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 7fd09f25452d4..bb47af9054a95 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -617,6 +617,8 @@ #endif #define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */ #define SPRN_HID2_GEKKO 0x398 /* Gekko HID2 Register */ +#define SPRN_HID2_G2_LE 0x3F3 /* G2_LE HID2 Register */ +#define HID2_G2_LE_HBE (1<<18) /* High BAT Enable (G2_LE) */ #define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */ #define SPRN_IABR2 0x3FA /* 83xx */ #define SPRN_IBCR 0x135 /* 83xx Insn Breakpoint Control Reg */ diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index f29ce3dd6140f..bfd3f442e5eb9 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -26,6 +26,15 @@ BEGIN_FTR_SECTION bl __init_fpu_registers END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE) bl setup_common_caches + + /* + * This assumes that all cores using __setup_cpu_603 with + * MMU_FTR_USE_HIGH_BATS are G2_LE compatible + */ +BEGIN_MMU_FTR_SECTION + bl setup_g2_le_hid2 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) + mtlr r5 blr _GLOBAL(__setup_cpu_604) @@ -115,6 +124,16 @@ SYM_FUNC_START_LOCAL(setup_604_hid0) blr SYM_FUNC_END(setup_604_hid0) +/* Enable high BATs for G2_LE and derivatives like e300cX */ +SYM_FUNC_START_LOCAL(setup_g2_le_hid2) + mfspr r11,SPRN_HID2_G2_LE + oris r11,r11,HID2_G2_LE_HBE@h + mtspr SPRN_HID2_G2_LE,r11 + sync + isync + blr +SYM_FUNC_END(setup_g2_le_hid2) + /* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some * erratas we work around here. * Moto MPC710CE.pdf describes them, those are errata @@ -495,4 +514,3 @@ _GLOBAL(__restore_cpu_setup) mtcr r7 blr _ASM_NOKPROBE_SYMBOL(__restore_cpu_setup) - -- GitLab From 4a7aee96200ad281a5cc4cf5c7a2e2a49d2b97b0 Mon Sep 17 00:00:00 2001 From: Jiangfeng Xiao Date: Tue, 23 Jan 2024 09:45:59 +0800 Subject: [PATCH 0680/1405] powerpc/kasan: Fix addr error caused by page alignment In kasan_init_region, when k_start is not page aligned, at the begin of for loop, k_cur = k_start & PAGE_MASK is less than k_start, and then `va = block + k_cur - k_start` is less than block, the addr va is invalid, because the memory address space from va to block is not alloced by memblock_alloc, which will not be reserved by memblock_reserve later, it will be used by other places. As a result, memory overwriting occurs. for example: int __init __weak kasan_init_region(void *start, size_t size) { [...] /* if say block(dcd97000) k_start(feef7400) k_end(feeff3fe) */ block = memblock_alloc(k_end - k_start, PAGE_SIZE); [...] for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { /* at the begin of for loop * block(dcd97000) va(dcd96c00) k_cur(feef7000) k_start(feef7400) * va(dcd96c00) is less than block(dcd97000), va is invalid */ void *va = block + k_cur - k_start; [...] } [...] } Therefore, page alignment is performed on k_start before memblock_alloc() to ensure the validity of the VA address. Fixes: 663c0c9496a6 ("powerpc/kasan: Fix shadow area set up for modules.") Signed-off-by: Jiangfeng Xiao Signed-off-by: Michael Ellerman Link: https://msgid.link/1705974359-43790-1-git-send-email-xiaojiangfeng@huawei.com --- arch/powerpc/mm/kasan/init_32.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c index a70828a6d9357..aa9aa11927b2f 100644 --- a/arch/powerpc/mm/kasan/init_32.c +++ b/arch/powerpc/mm/kasan/init_32.c @@ -64,6 +64,7 @@ int __init __weak kasan_init_region(void *start, size_t size) if (ret) return ret; + k_start = k_start & PAGE_MASK; block = memblock_alloc(k_end - k_start, PAGE_SIZE); if (!block) return -ENOMEM; -- GitLab From f31041417bf7f4a4df8b3bfb52cb31bbe805b934 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 2 Feb 2024 15:19:13 +0000 Subject: [PATCH 0681/1405] rxrpc: Fix generation of serial numbers to skip zero In the Rx protocol, every packet generated is marked with a per-connection monotonically increasing serial number. This number can be referenced in an ACK packet generated in response to an incoming packet - thereby allowing the sender to use this for RTT determination, amongst other things. However, if the reference field in the ACK is zero, it doesn't refer to any incoming packet (it could be a ping to find out if a packet got lost, for example) - so we shouldn't generate zero serial numbers. Fix the generation of serial numbers to retry if it comes up with a zero. Furthermore, since the serial numbers are only ever allocated within the I/O thread this connection is bound to, there's no need for atomics so remove that too. Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- net/rxrpc/ar-internal.h | 16 +++++++++++++++- net/rxrpc/conn_event.c | 2 +- net/rxrpc/output.c | 8 ++++---- net/rxrpc/proc.c | 2 +- net/rxrpc/rxkad.c | 4 ++-- 5 files changed, 23 insertions(+), 9 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index dbeb75c298573..31b0dd8c9b2d7 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -510,7 +510,7 @@ struct rxrpc_connection { enum rxrpc_call_completion completion; /* Completion condition */ s32 abort_code; /* Abort code of connection abort */ int debug_id; /* debug ID for printks */ - atomic_t serial; /* packet serial number counter */ + rxrpc_serial_t tx_serial; /* Outgoing packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ u32 service_id; /* Service ID, possibly upgraded */ u32 security_level; /* Security level selected */ @@ -822,6 +822,20 @@ static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb) #include +/* + * Allocate the next serial number on a connection. 0 must be skipped. + */ +static inline rxrpc_serial_t rxrpc_get_next_serial(struct rxrpc_connection *conn) +{ + rxrpc_serial_t serial; + + serial = conn->tx_serial; + if (serial == 0) + serial = 1; + conn->tx_serial = serial + 1; + return serial; +} + /* * af_rxrpc.c */ diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 95f4bc206b3dc..ec5eae60ab0c4 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -117,7 +117,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, iov[2].iov_base = &ack_info; iov[2].iov_len = sizeof(ack_info); - serial = atomic_inc_return(&conn->serial); + serial = rxrpc_get_next_serial(conn); pkt.whdr.epoch = htonl(conn->proto.epoch); pkt.whdr.cid = htonl(conn->proto.cid | channel); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index a0906145e8293..4a292f860ae37 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -216,7 +216,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) iov[0].iov_len = sizeof(txb->wire) + sizeof(txb->ack) + n; len = iov[0].iov_len; - serial = atomic_inc_return(&conn->serial); + serial = rxrpc_get_next_serial(conn); txb->wire.serial = htonl(serial); trace_rxrpc_tx_ack(call->debug_id, serial, ntohl(txb->ack.firstPacket), @@ -302,7 +302,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) iov[0].iov_base = &pkt; iov[0].iov_len = sizeof(pkt); - serial = atomic_inc_return(&conn->serial); + serial = rxrpc_get_next_serial(conn); pkt.whdr.serial = htonl(serial); iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt)); @@ -334,7 +334,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) _enter("%x,{%d}", txb->seq, txb->len); /* Each transmission of a Tx packet needs a new serial number */ - serial = atomic_inc_return(&conn->serial); + serial = rxrpc_get_next_serial(conn); txb->wire.serial = htonl(serial); if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) && @@ -558,7 +558,7 @@ void rxrpc_send_conn_abort(struct rxrpc_connection *conn) len = iov[0].iov_len + iov[1].iov_len; - serial = atomic_inc_return(&conn->serial); + serial = rxrpc_get_next_serial(conn); whdr.serial = htonl(serial); iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 6c86cbb98d1d6..26dc2f26d92d8 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -181,7 +181,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) atomic_read(&conn->active), state, key_serial(conn->key), - atomic_read(&conn->serial), + conn->tx_serial, conn->hi_serial, conn->channels[0].call_id, conn->channels[1].call_id, diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index b52dedcebce0a..6b32d61d4cdc4 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -664,7 +664,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn) len = iov[0].iov_len + iov[1].iov_len; - serial = atomic_inc_return(&conn->serial); + serial = rxrpc_get_next_serial(conn); whdr.serial = htonl(serial); ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len); @@ -721,7 +721,7 @@ static int rxkad_send_response(struct rxrpc_connection *conn, len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len; - serial = atomic_inc_return(&conn->serial); + serial = rxrpc_get_next_serial(conn); whdr.serial = htonl(serial); rxrpc_local_dont_fragment(conn->local, false); -- GitLab From e7870cf13d20f56bfc19f9c3e89707c69cf104ef Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 2 Feb 2024 15:19:14 +0000 Subject: [PATCH 0682/1405] rxrpc: Fix delayed ACKs to not set the reference serial number Fix the construction of delayed ACKs to not set the reference serial number as they can't be used as an RTT reference. Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- net/rxrpc/ar-internal.h | 1 - net/rxrpc/call_event.c | 6 +----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 31b0dd8c9b2d7..b4ab26c3718a4 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -696,7 +696,6 @@ struct rxrpc_call { /* Receive-phase ACK management (ACKs we send). */ u8 ackr_reason; /* reason to ACK */ u16 ackr_sack_base; /* Starting slot in SACK table ring */ - rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ rxrpc_seq_t ackr_window; /* Base of SACK window */ rxrpc_seq_t ackr_wtop; /* Base of SACK window */ unsigned int ackr_nr_unacked; /* Number of unacked packets */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index e363f21a20141..c61efe08695d7 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -43,8 +43,6 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial, unsigned long expiry = rxrpc_soft_ack_delay; unsigned long now = jiffies, ack_at; - call->ackr_serial = serial; - if (rxrpc_soft_ack_delay < expiry) expiry = rxrpc_soft_ack_delay; if (call->peer->srtt_us != 0) @@ -373,7 +371,6 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call) bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) { unsigned long now, next, t; - rxrpc_serial_t ackr_serial; bool resend = false, expired = false; s32 abort_code; @@ -423,8 +420,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now); cmpxchg(&call->delay_ack_at, t, now + MAX_JIFFY_OFFSET); - ackr_serial = xchg(&call->ackr_serial, 0); - rxrpc_send_ACK(call, RXRPC_ACK_DELAY, ackr_serial, + rxrpc_send_ACK(call, RXRPC_ACK_DELAY, 0, rxrpc_propose_ack_ping_for_lost_ack); } -- GitLab From 6f769f22822aa4124b556339781b04d810f0e038 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 2 Feb 2024 15:19:15 +0000 Subject: [PATCH 0683/1405] rxrpc: Fix response to PING RESPONSE ACKs to a dead call Stop rxrpc from sending a DUP ACK in response to a PING RESPONSE ACK on a dead call. We may have initiated the ping but the call may have beaten the response to completion. Fixes: 18bfeba50dfd ("rxrpc: Perform terminal call ACK/ABORT retransmission from conn processor") Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- net/rxrpc/conn_event.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index ec5eae60ab0c4..1f251d758cb9d 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -95,6 +95,14 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, _enter("%d", conn->debug_id); + if (sp && sp->hdr.type == RXRPC_PACKET_TYPE_ACK) { + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &pkt.ack, sizeof(pkt.ack)) < 0) + return; + if (pkt.ack.reason == RXRPC_ACK_PING_RESPONSE) + return; + } + chan = &conn->channels[channel]; /* If the last call got moved on whilst we were waiting to run, just -- GitLab From 41b7fa157ea1c8c3a575ca7f5f32034de9bee3ae Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 2 Feb 2024 15:19:16 +0000 Subject: [PATCH 0684/1405] rxrpc: Fix counting of new acks and nacks Fix the counting of new acks and nacks when parsing a packet - something that is used in congestion control. As the code stands, it merely notes if there are any nacks whereas what we really should do is compare the previous SACK table to the new one, assuming we get two successive ACK packets with nacks in them. However, we really don't want to do that if we can avoid it as the tables might not correspond directly as one may be shifted from the other - something that will only get harder to deal with once extended ACK tables come into full use (with a capacity of up to 8192). Instead, count the number of nacks shifted out of the old SACK, the number of nacks retained in the portion still active and the number of new acks and nacks in the new table then calculate what we need. Note this ends up a bit of an estimate as the Rx protocol allows acks to be withdrawn by the receiver and packets requested to be retransmitted. Fixes: d57a3a151660 ("rxrpc: Save last ACK's SACK table rather than marking txbufs") Signed-off-by: David Howells cc: Marc Dionne cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- include/trace/events/rxrpc.h | 8 ++- net/rxrpc/ar-internal.h | 20 ++++-- net/rxrpc/call_event.c | 6 +- net/rxrpc/call_object.c | 1 + net/rxrpc/input.c | 115 +++++++++++++++++++++++++++++------ 5 files changed, 122 insertions(+), 28 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 4c1ef7b3705c2..87b8de9b6c1c4 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -128,6 +128,7 @@ EM(rxrpc_skb_eaten_by_unshare_nomem, "ETN unshar-nm") \ EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \ EM(rxrpc_skb_get_conn_work, "GET conn-work") \ + EM(rxrpc_skb_get_last_nack, "GET last-nack") \ EM(rxrpc_skb_get_local_work, "GET locl-work") \ EM(rxrpc_skb_get_reject_work, "GET rej-work ") \ EM(rxrpc_skb_get_to_recvmsg, "GET to-recv ") \ @@ -141,6 +142,7 @@ EM(rxrpc_skb_put_error_report, "PUT error-rep") \ EM(rxrpc_skb_put_input, "PUT input ") \ EM(rxrpc_skb_put_jumbo_subpacket, "PUT jumbo-sub") \ + EM(rxrpc_skb_put_last_nack, "PUT last-nack") \ EM(rxrpc_skb_put_purge, "PUT purge ") \ EM(rxrpc_skb_put_rotate, "PUT rotate ") \ EM(rxrpc_skb_put_unknown, "PUT unknown ") \ @@ -1552,7 +1554,7 @@ TRACE_EVENT(rxrpc_congest, memcpy(&__entry->sum, summary, sizeof(__entry->sum)); ), - TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u r=%u b=%u u=%u d=%u l=%x%s%s%s", + TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u,%u b=%u u=%u d=%u l=%x%s%s%s", __entry->call, __entry->ack_serial, __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names), @@ -1560,9 +1562,9 @@ TRACE_EVENT(rxrpc_congest, __print_symbolic(__entry->sum.mode, rxrpc_congest_modes), __entry->sum.cwnd, __entry->sum.ssthresh, - __entry->sum.nr_acks, __entry->sum.saw_nacks, + __entry->sum.nr_acks, __entry->sum.nr_retained_nacks, __entry->sum.nr_new_acks, - __entry->sum.nr_rot_new_acks, + __entry->sum.nr_new_nacks, __entry->top - __entry->hard_ack, __entry->sum.cumulative_acks, __entry->sum.dup_acks, diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index b4ab26c3718a4..7818aae1be8e0 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -199,11 +199,19 @@ struct rxrpc_host_header { */ struct rxrpc_skb_priv { struct rxrpc_connection *conn; /* Connection referred to (poke packet) */ - u16 offset; /* Offset of data */ - u16 len; /* Length of data */ - u8 flags; + union { + struct { + u16 offset; /* Offset of data */ + u16 len; /* Length of data */ + u8 flags; #define RXRPC_RX_VERIFIED 0x01 - + }; + struct { + rxrpc_seq_t first_ack; /* First packet in acks table */ + u8 nr_acks; /* Number of acks+nacks */ + u8 nr_nacks; /* Number of nacks */ + }; + }; struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ }; @@ -692,6 +700,7 @@ struct rxrpc_call { u8 cong_dup_acks; /* Count of ACKs showing missing packets */ u8 cong_cumul_acks; /* Cumulative ACK count */ ktime_t cong_tstamp; /* Last time cwnd was changed */ + struct sk_buff *cong_last_nack; /* Last ACK with nacks received */ /* Receive-phase ACK management (ACKs we send). */ u8 ackr_reason; /* reason to ACK */ @@ -729,7 +738,8 @@ struct rxrpc_call { struct rxrpc_ack_summary { u16 nr_acks; /* Number of ACKs in packet */ u16 nr_new_acks; /* Number of new ACKs in packet */ - u16 nr_rot_new_acks; /* Number of rotated new ACKs */ + u16 nr_new_nacks; /* Number of new nacks in packet */ + u16 nr_retained_nacks; /* Number of nacks retained between ACKs */ u8 ack_reason; bool saw_nacks; /* Saw NACKs in packet */ bool new_low_nack; /* T if new low NACK found */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index c61efe08695d7..0f78544d043be 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -112,6 +112,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) { struct rxrpc_ackpacket *ack = NULL; + struct rxrpc_skb_priv *sp; struct rxrpc_txbuf *txb; unsigned long resend_at; rxrpc_seq_t transmitted = READ_ONCE(call->tx_transmitted); @@ -139,14 +140,15 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) * explicitly NAK'd packets. */ if (ack_skb) { + sp = rxrpc_skb(ack_skb); ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header); - for (i = 0; i < ack->nAcks; i++) { + for (i = 0; i < sp->nr_acks; i++) { rxrpc_seq_t seq; if (ack->acks[i] & 1) continue; - seq = ntohl(ack->firstPacket) + i; + seq = sp->first_ack + i; if (after(txb->seq, transmitted)) break; if (after(txb->seq, seq)) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 0943e54370ba0..9fc9a6c3f6858 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -686,6 +686,7 @@ static void rxrpc_destroy_call(struct work_struct *work) del_timer_sync(&call->timer); + rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); rxrpc_cleanup_ring(call); while ((txb = list_first_entry_or_null(&call->tx_sendmsg, struct rxrpc_txbuf, call_link))) { diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 92495e73b8699..9691de00ade75 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -45,11 +45,9 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, } cumulative_acks += summary->nr_new_acks; - cumulative_acks += summary->nr_rot_new_acks; if (cumulative_acks > 255) cumulative_acks = 255; - summary->mode = call->cong_mode; summary->cwnd = call->cong_cwnd; summary->ssthresh = call->cong_ssthresh; summary->cumulative_acks = cumulative_acks; @@ -151,6 +149,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, cwnd = RXRPC_TX_MAX_WINDOW; call->cong_cwnd = cwnd; call->cong_cumul_acks = cumulative_acks; + summary->mode = call->cong_mode; trace_rxrpc_congest(call, summary, acked_serial, change); if (resend) rxrpc_resend(call, skb); @@ -213,7 +212,6 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) { if (before_eq(txb->seq, call->acks_hard_ack)) continue; - summary->nr_rot_new_acks++; if (test_bit(RXRPC_TXBUF_LAST, &txb->flags)) { set_bit(RXRPC_CALL_TX_LAST, &call->flags); rot_last = true; @@ -254,6 +252,11 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, { ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); + if (unlikely(call->cong_last_nack)) { + rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); + call->cong_last_nack = NULL; + } + switch (__rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: @@ -702,6 +705,43 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, wake_up(&call->waitq); } +/* + * Determine how many nacks from the previous ACK have now been satisfied. + */ +static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + rxrpc_seq_t seq) +{ + struct sk_buff *skb = call->cong_last_nack; + struct rxrpc_ackpacket ack; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int i, new_acks = 0, retained_nacks = 0; + rxrpc_seq_t old_seq = sp->first_ack; + u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(ack); + + if (after_eq(seq, old_seq + sp->nr_acks)) { + summary->nr_new_acks += sp->nr_nacks; + summary->nr_new_acks += seq - (old_seq + sp->nr_acks); + summary->nr_retained_nacks = 0; + } else if (seq == old_seq) { + summary->nr_retained_nacks = sp->nr_nacks; + } else { + for (i = 0; i < sp->nr_acks; i++) { + if (acks[i] == RXRPC_ACK_TYPE_NACK) { + if (before(old_seq + i, seq)) + new_acks++; + else + retained_nacks++; + } + } + + summary->nr_new_acks += new_acks; + summary->nr_retained_nacks = retained_nacks; + } + + return old_seq + sp->nr_acks; +} + /* * Process individual soft ACKs. * @@ -711,25 +751,51 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, * the timer on the basis that the peer might just not have processed them at * the time the ACK was sent. */ -static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, - rxrpc_seq_t seq, int nr_acks, - struct rxrpc_ack_summary *summary) +static void rxrpc_input_soft_acks(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct sk_buff *skb, + rxrpc_seq_t seq, + rxrpc_seq_t since) { - unsigned int i; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int i, old_nacks = 0; + rxrpc_seq_t lowest_nak = seq + sp->nr_acks; + u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); - for (i = 0; i < nr_acks; i++) { + for (i = 0; i < sp->nr_acks; i++) { if (acks[i] == RXRPC_ACK_TYPE_ACK) { summary->nr_acks++; - summary->nr_new_acks++; + if (after_eq(seq, since)) + summary->nr_new_acks++; } else { - if (!summary->saw_nacks && - call->acks_lowest_nak != seq + i) { - call->acks_lowest_nak = seq + i; - summary->new_low_nack = true; - } summary->saw_nacks = true; + if (before(seq, since)) { + /* Overlap with previous ACK */ + old_nacks++; + } else { + summary->nr_new_nacks++; + sp->nr_nacks++; + } + + if (before(seq, lowest_nak)) + lowest_nak = seq; } + seq++; + } + + if (lowest_nak != call->acks_lowest_nak) { + call->acks_lowest_nak = lowest_nak; + summary->new_low_nack = true; } + + /* We *can* have more nacks than we did - the peer is permitted to drop + * packets it has soft-acked and re-request them. Further, it is + * possible for the nack distribution to change whilst the number of + * nacks stays the same or goes down. + */ + if (old_nacks < summary->nr_retained_nacks) + summary->nr_new_acks += summary->nr_retained_nacks - old_nacks; + summary->nr_retained_nacks = old_nacks; } /* @@ -773,7 +839,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_ackinfo info; rxrpc_serial_t ack_serial, acked_serial; - rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt; + rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since; int nr_acks, offset, ioffset; _enter(""); @@ -789,6 +855,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) prev_pkt = ntohl(ack.previousPacket); hard_ack = first_soft_ack - 1; nr_acks = ack.nAcks; + sp->first_ack = first_soft_ack; + sp->nr_acks = nr_acks; summary.ack_reason = (ack.reason < RXRPC_ACK__INVALID ? ack.reason : RXRPC_ACK__INVALID); @@ -858,6 +926,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (nr_acks > 0) skb_condense(skb); + if (call->cong_last_nack) { + since = rxrpc_input_check_prev_ack(call, &summary, first_soft_ack); + rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); + call->cong_last_nack = NULL; + } else { + summary.nr_new_acks = first_soft_ack - call->acks_first_seq; + call->acks_lowest_nak = first_soft_ack + nr_acks; + since = first_soft_ack; + } + call->acks_latest_ts = skb->tstamp; call->acks_first_seq = first_soft_ack; call->acks_prev_seq = prev_pkt; @@ -866,7 +944,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) case RXRPC_ACK_PING: break; default: - if (after(acked_serial, call->acks_highest_serial)) + if (acked_serial && after(acked_serial, call->acks_highest_serial)) call->acks_highest_serial = acked_serial; break; } @@ -905,8 +983,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (nr_acks > 0) { if (offset > (int)skb->len - nr_acks) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack); - rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack, - nr_acks, &summary); + rxrpc_input_soft_acks(call, &summary, skb, first_soft_ack, since); + rxrpc_get_skb(skb, rxrpc_skb_get_last_nack); + call->cong_last_nack = skb; } if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) && -- GitLab From a19747c3b9bf6476cc36d0a3a5ef0ff92999169e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 2 Feb 2024 17:06:59 +0100 Subject: [PATCH 0685/1405] selftests: net: let big_tcp test cope with slow env In very slow environments, most big TCP cases including segmentation and reassembly of big TCP packets have a good chance to fail: by default the TCP client uses write size well below 64K. If the host is low enough autocorking is unable to build real big TCP packets. Address the issue using much larger write operations. Note that is hard to observe the issue without an extremely slow and/or overloaded environment; reduce the TCP transfer time to allow for much easier/faster reproducibility. Fixes: 6bb382bcf742 ("selftests: add a selftest for big tcp") Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Acked-by: Xin Long Signed-off-by: David S. Miller --- tools/testing/selftests/net/big_tcp.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/big_tcp.sh b/tools/testing/selftests/net/big_tcp.sh index cde9a91c47971..2db9d15cd45fe 100755 --- a/tools/testing/selftests/net/big_tcp.sh +++ b/tools/testing/selftests/net/big_tcp.sh @@ -122,7 +122,9 @@ do_netperf() { local netns=$1 [ "$NF" = "6" ] && serip=$SERVER_IP6 - ip net exec $netns netperf -$NF -t TCP_STREAM -H $serip 2>&1 >/dev/null + + # use large write to be sure to generate big tcp packets + ip net exec $netns netperf -$NF -t TCP_STREAM -l 1 -H $serip -- -m 262144 2>&1 >/dev/null } do_test() { -- GitLab From 4703b014f28bf7a2e56d1da238ee95ef6c5ce76b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 5 Feb 2024 15:44:30 +0300 Subject: [PATCH 0686/1405] ASoC: cs35l56: fix reversed if statement in cs35l56_dspwait_asp1tx_put() It looks like the "!" character was added accidentally. The regmap_update_bits_check() function is normally going to succeed. This means the rest of the function is unreachable and we don't handle the situation where "changed" is true correctly. Fixes: 07f7d6e7a124 ("ASoC: cs35l56: Fix for initializing ASP1 mixer registers") Signed-off-by: Dan Carpenter Reviewed-by: Richard Fitzgerald Link: https://lore.kernel.org/r/0c254c07-d1c0-4a5c-a22b-7e135cab032c@moroto.mountain Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index c23e29da4cfb9..ebed5ab1245b4 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -115,7 +115,7 @@ static int cs35l56_dspwait_asp1tx_put(struct snd_kcontrol *kcontrol, ret = regmap_update_bits_check(cs35l56->base.regmap, addr, CS35L56_ASP_TXn_SRC_MASK, val, &changed); - if (!ret) + if (ret) return ret; if (changed) -- GitLab From ac670505d825151ce47c1e75b9964485991954dd Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 2 Feb 2024 10:43:13 -0700 Subject: [PATCH 0687/1405] ASoC: dt-bindings: google,sc7280-herobrine: Drop bouncing @codeaurora The servers for the @codeaurora domain have long been retired and any messages sent there bounce. Srinivasa Rao Mandadapu has left the company and there does not appear to be an updated address to suggest, so drop Srinivasa as maintainer of the binding. The binding still appears to be maintined as Judy is listed. Signed-off-by: Jeffrey Hugo Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240202174313.4113670-1-quic_jhugo@quicinc.com Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/google,sc7280-herobrine.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/google,sc7280-herobrine.yaml b/Documentation/devicetree/bindings/sound/google,sc7280-herobrine.yaml index ec4b6e547ca6e..cdcd7c6f21eb2 100644 --- a/Documentation/devicetree/bindings/sound/google,sc7280-herobrine.yaml +++ b/Documentation/devicetree/bindings/sound/google,sc7280-herobrine.yaml @@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Google SC7280-Herobrine ASoC sound card driver maintainers: - - Srinivasa Rao Mandadapu - Judy Hsiao description: -- GitLab From b5fbde22684af5456d1de60758950944d69d69ad Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Fri, 2 Feb 2024 12:49:01 +0100 Subject: [PATCH 0688/1405] ASoC: Intel: avs: Fix pci_probe() error path Recent changes modified operation-order in the probe() function without updating its error path accordingly. If snd_hdac_i915_init() exists with status EPROBE_DEFER the error path must cleanup allocated IRQs before leaving the scope. Fixes: 2dddc514b6e4 ("ASoC: Intel: avs: Move snd_hdac_i915_init to before probe_work.") Signed-off-by: Cezary Rojewski Link: https://lore.kernel.org/r/20240202114901.1002127-1-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/intel/avs/core.c b/sound/soc/intel/avs/core.c index 59c3793f65df0..db78eb2f01080 100644 --- a/sound/soc/intel/avs/core.c +++ b/sound/soc/intel/avs/core.c @@ -477,6 +477,9 @@ static int avs_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) return 0; err_i915_init: + pci_free_irq(pci, 0, adev); + pci_free_irq(pci, 0, bus); + pci_free_irq_vectors(pci); pci_clear_master(pci); pci_set_drvdata(pci, NULL); err_acquire_irq: -- GitLab From 34a1066981a967eab619938e7b35a9be6b4c34e1 Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Sun, 4 Feb 2024 21:01:17 +0100 Subject: [PATCH 0689/1405] ASoC: tas2781: add module parameter to tascodec_init() The tascodec_init() of the snd-soc-tas2781-comlib module is called from snd-soc-tas2781-i2c and snd-hda-scodec-tas2781-i2c modules. It calls request_firmware_nowait() with parameter THIS_MODULE and a cont/callback from the latter modules. The latter modules can be removed while their callbacks are running, resulting in a general protection failure. Add module parameter to tascodec_init() so request_firmware_nowait() can be called with the module of the callback. Fixes: ef3bcde75d06 ("ASoC: tas2781: Add tas2781 driver") CC: stable@vger.kernel.org Signed-off-by: Gergo Koteles Link: https://lore.kernel.org/r/118dad922cef50525e5aab09badef2fa0eb796e5.1707076603.git.soyer@irl.hu Signed-off-by: Mark Brown --- include/sound/tas2781.h | 1 + sound/pci/hda/tas2781_hda_i2c.c | 2 +- sound/soc/codecs/tas2781-comlib.c | 3 ++- sound/soc/codecs/tas2781-i2c.c | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index b00d65417c310..9aff384941de2 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -142,6 +142,7 @@ struct tasdevice_priv { void tas2781_reset(struct tasdevice_priv *tas_dev); int tascodec_init(struct tasdevice_priv *tas_priv, void *codec, + struct module *module, void (*cont)(const struct firmware *fw, void *context)); struct tasdevice_priv *tasdevice_kzalloc(struct i2c_client *i2c); int tasdevice_init(struct tasdevice_priv *tas_priv); diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index 2dd809de62e5a..1bfb00102a77a 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -710,7 +710,7 @@ static int tas2781_hda_bind(struct device *dev, struct device *master, strscpy(comps->name, dev_name(dev), sizeof(comps->name)); - ret = tascodec_init(tas_hda->priv, codec, tasdev_fw_ready); + ret = tascodec_init(tas_hda->priv, codec, THIS_MODULE, tasdev_fw_ready); if (!ret) comps->playback_hook = tas2781_hda_playback_hook; diff --git a/sound/soc/codecs/tas2781-comlib.c b/sound/soc/codecs/tas2781-comlib.c index b7e56ceb1acff..5d0e5348b361a 100644 --- a/sound/soc/codecs/tas2781-comlib.c +++ b/sound/soc/codecs/tas2781-comlib.c @@ -267,6 +267,7 @@ void tas2781_reset(struct tasdevice_priv *tas_dev) EXPORT_SYMBOL_GPL(tas2781_reset); int tascodec_init(struct tasdevice_priv *tas_priv, void *codec, + struct module *module, void (*cont)(const struct firmware *fw, void *context)) { int ret = 0; @@ -280,7 +281,7 @@ int tascodec_init(struct tasdevice_priv *tas_priv, void *codec, tas_priv->dev_name, tas_priv->ndev); crc8_populate_msb(tas_priv->crc8_lkp_tbl, TASDEVICE_CRC8_POLYNOMIAL); tas_priv->codec = codec; - ret = request_firmware_nowait(THIS_MODULE, FW_ACTION_UEVENT, + ret = request_firmware_nowait(module, FW_ACTION_UEVENT, tas_priv->rca_binaryname, tas_priv->dev, GFP_KERNEL, tas_priv, cont); if (ret) diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index 32913bd1a6233..b5abff230e437 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -566,7 +566,7 @@ static int tasdevice_codec_probe(struct snd_soc_component *codec) { struct tasdevice_priv *tas_priv = snd_soc_component_get_drvdata(codec); - return tascodec_init(tas_priv, codec, tasdevice_fw_ready); + return tascodec_init(tas_priv, codec, THIS_MODULE, tasdevice_fw_ready); } static void tasdevice_deinit(void *context) -- GitLab From c712c05e46c8ce550842951e9e2606e24dbf0475 Mon Sep 17 00:00:00 2001 From: Carlos Song Date: Sun, 4 Feb 2024 17:19:12 +0800 Subject: [PATCH 0690/1405] spi: imx: fix the burst length at DMA mode and CPU mode For DMA mode, the bus width of the DMA is equal to the size of data word, so burst length should be configured as bits per word. For CPU mode, because of the spi transfer len is in byte, so calculate the total number of words according to spi transfer len and bits per word, burst length should be configured as total data bits. Signed-off-by: Carlos Song Reviewed-by: Clark Wang Fixes: e9b220aeacf1 ("spi: spi-imx: correctly configure burst length when using dma") Fixes: 5f66db08cbd3 ("spi: imx: Take in account bits per word instead of assuming 8-bits") Link: https://lore.kernel.org/r/20240204091912.36488-1-carlos.song@nxp.com Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 546cdce525fc5..833a1bb7a9143 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -2,6 +2,7 @@ // Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved. // Copyright (C) 2008 Juergen Beisert +#include #include #include #include @@ -660,15 +661,15 @@ static int mx51_ecspi_prepare_transfer(struct spi_imx_data *spi_imx, << MX51_ECSPI_CTRL_BL_OFFSET; else { if (spi_imx->usedma) { - ctrl |= (spi_imx->bits_per_word * - spi_imx_bytes_per_word(spi_imx->bits_per_word) - 1) + ctrl |= (spi_imx->bits_per_word - 1) << MX51_ECSPI_CTRL_BL_OFFSET; } else { if (spi_imx->count >= MX51_ECSPI_CTRL_MAX_BURST) - ctrl |= (MX51_ECSPI_CTRL_MAX_BURST - 1) + ctrl |= (MX51_ECSPI_CTRL_MAX_BURST * BITS_PER_BYTE - 1) << MX51_ECSPI_CTRL_BL_OFFSET; else - ctrl |= (spi_imx->count * spi_imx->bits_per_word - 1) + ctrl |= spi_imx->count / DIV_ROUND_UP(spi_imx->bits_per_word, + BITS_PER_BYTE) * spi_imx->bits_per_word << MX51_ECSPI_CTRL_BL_OFFSET; } } -- GitLab From 5ea9a7c5fe4149f165f0e3b624fe08df02b6c301 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 5 Feb 2024 13:22:39 +1100 Subject: [PATCH 0691/1405] nfsd: don't take fi_lock in nfsd_break_deleg_cb() A recent change to check_for_locks() changed it to take ->flc_lock while holding ->fi_lock. This creates a lock inversion (reported by lockdep) because there is a case where ->fi_lock is taken while holding ->flc_lock. ->flc_lock is held across ->fl_lmops callbacks, and nfsd_break_deleg_cb() is one of those and does take ->fi_lock. However it doesn't need to. Prior to v4.17-rc1~110^2~22 ("nfsd: create a separate lease for each delegation") nfsd_break_deleg_cb() would walk the ->fi_delegations list and so needed the lock. Since then it doesn't walk the list and doesn't need the lock. Two actions are performed under the lock. One is to call nfsd_break_one_deleg which calls nfsd4_run_cb(). These doesn't act on the nfs4_file at all, so don't need the lock. The other is to set ->fi_had_conflict which is in the nfs4_file. This field is only ever set here (except when initialised to false) so there is no possible problem will multiple threads racing when setting it. The field is tested twice in nfs4_set_delegation(). The first test does not hold a lock and is documented as an opportunistic optimisation, so it doesn't impose any need to hold ->fi_lock while setting ->fi_had_conflict. The second test in nfs4_set_delegation() *is* make under ->fi_lock, so removing the locking when ->fi_had_conflict is set could make a change. The change could only be interesting if ->fi_had_conflict tested as false even though nfsd_break_one_deleg() ran before ->fi_lock was unlocked. i.e. while hash_delegation_locked() was running. As hash_delegation_lock() doesn't interact in any way with nfs4_run_cb() there can be no importance to this interaction. So this patch removes the locking from nfsd_break_one_deleg() and moves the final test on ->fi_had_conflict out of the locked region to make it clear that locking isn't important to the test. It is still tested *after* vfs_setlease() has succeeded. This might be significant and as vfs_setlease() takes ->flc_lock, and nfsd_break_one_deleg() is called under ->flc_lock this "after" is a true ordering provided by a spinlock. Fixes: edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER") Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6dc6340e28529..7d6c657e0409d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4945,10 +4945,8 @@ nfsd_break_deleg_cb(struct file_lock *fl) */ fl->fl_break_time = 0; - spin_lock(&fp->fi_lock); fp->fi_had_conflict = true; nfsd_break_one_deleg(dp); - spin_unlock(&fp->fi_lock); return false; } @@ -5557,12 +5555,13 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, if (status) goto out_unlock; + status = -EAGAIN; + if (fp->fi_had_conflict) + goto out_unlock; + spin_lock(&state_lock); spin_lock(&fp->fi_lock); - if (fp->fi_had_conflict) - status = -EAGAIN; - else - status = hash_delegation_locked(dp, fp); + status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); -- GitLab From 17adc3f329e922bd1dd5aee45f43d9dab351c1e9 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 22 Jan 2024 11:35:14 -0600 Subject: [PATCH 0692/1405] net: marvell,prestera: Fix example PCI bus addressing The example for PCI devices has some addressing errors. 'reg' is written as if the parent bus is PCI, but the default bus for examples is 1 address and size cell. 'ranges' is defining config space with a size of 0. Generally, config space should not be defined in 'ranges', only PCI memory and I/O spaces. Fix these issues by updating the values with made-up, but valid values. This was uncovered with recent dtschema changes. Link: https://lore.kernel.org/r/20240122173514.935742-1-robh@kernel.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/net/marvell,prestera.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/marvell,prestera.yaml b/Documentation/devicetree/bindings/net/marvell,prestera.yaml index 5ea8b73663a50..16ff892f7bbd0 100644 --- a/Documentation/devicetree/bindings/net/marvell,prestera.yaml +++ b/Documentation/devicetree/bindings/net/marvell,prestera.yaml @@ -78,8 +78,8 @@ examples: pcie@0 { #address-cells = <3>; #size-cells = <2>; - ranges = <0x0 0x0 0x0 0x0 0x0 0x0>; - reg = <0x0 0x0 0x0 0x0 0x0 0x0>; + ranges = <0x02000000 0x0 0x100000 0x10000000 0x0 0x0>; + reg = <0x0 0x1000>; device_type = "pci"; switch@0,0 { -- GitLab From 61712c94782ce105253ee1939cda0c5c025b2c0c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 30 Jan 2024 13:26:43 +1000 Subject: [PATCH 0693/1405] nouveau/gsp: use correct size for registry rpc. Timur pointed this out before, and it just slipped my mind, but this might help some things work better, around pcie power management. Cc: # v6.7 Fixes: 8d55b0a940bb ("nouveau/gsp: add some basic registry entries.") Signed-off-by: Dave Airlie Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240130032643.2498315-1-airlied@gmail.com --- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index 9ee58e2a0eb2a..5e1fa176aac4a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -1078,7 +1078,6 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp) if (IS_ERR(rpc)) return PTR_ERR(rpc); - rpc->size = sizeof(*rpc); rpc->numEntries = NV_GSP_REG_NUM_ENTRIES; str_offset = offsetof(typeof(*rpc), entries[NV_GSP_REG_NUM_ENTRIES]); @@ -1094,6 +1093,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp) strings += name_len; str_offset += name_len; } + rpc->size = str_offset; return nvkm_gsp_rpc_wr(gsp, rpc, false); } -- GitLab From 042b5f83841fbf7ce39474412db3b5e4765a7ea7 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Fri, 2 Feb 2024 17:06:07 -0600 Subject: [PATCH 0694/1405] drm/nouveau: fix several DMA buffer leaks Nouveau manages GSP-RM DMA buffers with nvkm_gsp_mem objects. Several of these buffers are never dealloced. Some of them can be deallocated right after GSP-RM is initialized, but the rest need to stay until the driver unloads. Also futher bullet-proof these objects by poisoning the buffer and clearing the nvkm_gsp_mem object when it is deallocated. Poisoning the buffer should trigger an error (or crash) from GSP-RM if it tries to access the buffer after we've deallocated it, because we were wrong about when it is safe to deallocate. Finally, change the mem->size field to a size_t because that's the same type that dma_alloc_coherent expects. Cc: # v6.7 Fixes: 176fdcbddfd2 ("drm/nouveau/gsp/r535: add support for booting GSP-RM") Signed-off-by: Timur Tabi Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240202230608.1981026-1-ttabi@nvidia.com --- .../gpu/drm/nouveau/include/nvkm/subdev/gsp.h | 2 +- .../gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 59 ++++++++++++------- 2 files changed, 39 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h index d1437c08645f9..6f5d376d8fcc1 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h @@ -9,7 +9,7 @@ #define GSP_PAGE_SIZE BIT(GSP_PAGE_SHIFT) struct nvkm_gsp_mem { - u32 size; + size_t size; void *data; dma_addr_t addr; }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index 5e1fa176aac4a..6208ddd929645 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -997,6 +997,32 @@ r535_gsp_rpc_get_gsp_static_info(struct nvkm_gsp *gsp) return 0; } +static void +nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem) +{ + if (mem->data) { + /* + * Poison the buffer to catch any unexpected access from + * GSP-RM if the buffer was prematurely freed. + */ + memset(mem->data, 0xFF, mem->size); + + dma_free_coherent(gsp->subdev.device->dev, mem->size, mem->data, mem->addr); + memset(mem, 0, sizeof(*mem)); + } +} + +static int +nvkm_gsp_mem_ctor(struct nvkm_gsp *gsp, size_t size, struct nvkm_gsp_mem *mem) +{ + mem->size = size; + mem->data = dma_alloc_coherent(gsp->subdev.device->dev, size, &mem->addr, GFP_KERNEL); + if (WARN_ON(!mem->data)) + return -ENOMEM; + + return 0; +} + static int r535_gsp_postinit(struct nvkm_gsp *gsp) { @@ -1024,6 +1050,13 @@ r535_gsp_postinit(struct nvkm_gsp *gsp) nvkm_inth_allow(&gsp->subdev.inth); nvkm_wr32(device, 0x110004, 0x00000040); + + /* Release the DMA buffers that were needed only for boot and init */ + nvkm_gsp_mem_dtor(gsp, &gsp->boot.fw); + nvkm_gsp_mem_dtor(gsp, &gsp->libos); + nvkm_gsp_mem_dtor(gsp, &gsp->rmargs); + nvkm_gsp_mem_dtor(gsp, &gsp->wpr_meta); + return ret; } @@ -1532,27 +1565,6 @@ r535_gsp_msg_run_cpu_sequencer(void *priv, u32 fn, void *repv, u32 repc) return 0; } -static void -nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem) -{ - if (mem->data) { - dma_free_coherent(gsp->subdev.device->dev, mem->size, mem->data, mem->addr); - mem->data = NULL; - } -} - -static int -nvkm_gsp_mem_ctor(struct nvkm_gsp *gsp, u32 size, struct nvkm_gsp_mem *mem) -{ - mem->size = size; - mem->data = dma_alloc_coherent(gsp->subdev.device->dev, size, &mem->addr, GFP_KERNEL); - if (WARN_ON(!mem->data)) - return -ENOMEM; - - return 0; -} - - static int r535_gsp_booter_unload(struct nvkm_gsp *gsp, u32 mbox0, u32 mbox1) { @@ -2150,6 +2162,11 @@ r535_gsp_dtor(struct nvkm_gsp *gsp) mutex_destroy(&gsp->cmdq.mutex); r535_gsp_dtor_fws(gsp); + + nvkm_gsp_mem_dtor(gsp, &gsp->shm.mem); + nvkm_gsp_mem_dtor(gsp, &gsp->loginit); + nvkm_gsp_mem_dtor(gsp, &gsp->logintr); + nvkm_gsp_mem_dtor(gsp, &gsp->logrm); } int -- GitLab From 34e659f34a7559ecfd9c1f5b24d4c291f3f54711 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Fri, 2 Feb 2024 17:06:08 -0600 Subject: [PATCH 0695/1405] drm/nouveau: nvkm_gsp_radix3_sg() should use nvkm_gsp_mem_ctor() Function nvkm_gsp_radix3_sg() uses nvkm_gsp_mem objects to allocate the radix3 tables, but it unnecessarily creates those objects manually instead of using the standard nvkm_gsp_mem_ctor() function like the rest of the code does. Signed-off-by: Timur Tabi Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240202230608.1981026-2-ttabi@nvidia.com --- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index 6208ddd929645..a41735ab60683 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -1950,20 +1950,20 @@ nvkm_gsp_radix3_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_radix3 *rx3) * See kgspCreateRadix3_IMPL */ static int -nvkm_gsp_radix3_sg(struct nvkm_device *device, struct sg_table *sgt, u64 size, +nvkm_gsp_radix3_sg(struct nvkm_gsp *gsp, struct sg_table *sgt, u64 size, struct nvkm_gsp_radix3 *rx3) { u64 addr; for (int i = ARRAY_SIZE(rx3->mem) - 1; i >= 0; i--) { u64 *ptes; - int idx; + size_t bufsize; + int ret, idx; - rx3->mem[i].size = ALIGN((size / GSP_PAGE_SIZE) * sizeof(u64), GSP_PAGE_SIZE); - rx3->mem[i].data = dma_alloc_coherent(device->dev, rx3->mem[i].size, - &rx3->mem[i].addr, GFP_KERNEL); - if (WARN_ON(!rx3->mem[i].data)) - return -ENOMEM; + bufsize = ALIGN((size / GSP_PAGE_SIZE) * sizeof(u64), GSP_PAGE_SIZE); + ret = nvkm_gsp_mem_ctor(gsp, bufsize, &rx3->mem[i]); + if (ret) + return ret; ptes = rx3->mem[i].data; if (i == 2) { @@ -2003,7 +2003,7 @@ r535_gsp_fini(struct nvkm_gsp *gsp, bool suspend) if (ret) return ret; - ret = nvkm_gsp_radix3_sg(gsp->subdev.device, &gsp->sr.sgt, len, &gsp->sr.radix3); + ret = nvkm_gsp_radix3_sg(gsp, &gsp->sr.sgt, len, &gsp->sr.radix3); if (ret) return ret; @@ -2211,7 +2211,7 @@ r535_gsp_oneinit(struct nvkm_gsp *gsp) memcpy(gsp->sig.data, data, size); /* Build radix3 page table for ELF image. */ - ret = nvkm_gsp_radix3_sg(device, &gsp->fw.mem.sgt, gsp->fw.len, &gsp->radix3); + ret = nvkm_gsp_radix3_sg(gsp, &gsp->fw.mem.sgt, gsp->fw.len, &gsp->radix3); if (ret) return ret; -- GitLab From 0647903efbc84b772325b4d24d9487e24d6d1e03 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 3 Feb 2024 14:24:46 +0100 Subject: [PATCH 0696/1405] wifi: mt76: mt7996: fix fortify warning Copy cck and ofdm separately in order to avoid __read_overflow2_field warning. Fixes: f75e4779d215 ("wifi: mt76: mt7996: add txpower setting support") Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://msgid.link/20240203132446.54790-1-nbd@nbd.name --- drivers/net/wireless/mediatek/mt76/mt7996/mcu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c index 3c729b563edc5..699be57309c2e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c @@ -4477,7 +4477,8 @@ int mt7996_mcu_set_txpower_sku(struct mt7996_phy *phy) skb_put_data(skb, &req, sizeof(req)); /* cck and ofdm */ - skb_put_data(skb, &la.cck, sizeof(la.cck) + sizeof(la.ofdm)); + skb_put_data(skb, &la.cck, sizeof(la.cck)); + skb_put_data(skb, &la.ofdm, sizeof(la.ofdm)); /* ht20 */ skb_put_data(skb, &la.mcs[0], 8); /* ht40 */ -- GitLab From 3376ca3f1a2075eaa23c5576c47d04d7e8a4adda Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sat, 3 Feb 2024 13:45:20 +0100 Subject: [PATCH 0697/1405] KVM: x86: Fix KVM_GET_MSRS stack info leak Commit 6abe9c1386e5 ("KVM: X86: Move ignore_msrs handling upper the stack") changed the 'ignore_msrs' handling, including sanitizing return values to the caller. This was fine until commit 12bc2132b15e ("KVM: X86: Do the same ignore_msrs check for feature msrs") which allowed non-existing feature MSRs to be ignored, i.e. to not generate an error on the ioctl() level. It even tried to preserve the sanitization of the return value. However, the logic is flawed, as '*data' will be overwritten again with the uninitialized stack value of msr.data. Fix this by simplifying the logic and always initializing msr.data, vanishing the need for an additional error exit path. Fixes: 12bc2132b15e ("KVM: X86: Do the same ignore_msrs check for feature msrs") Signed-off-by: Mathias Krause Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20240203124522.592778-2-minipli@grsecurity.net Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 25bc52cdf8f45..3f6e82fd37f39 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1704,22 +1704,17 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) struct kvm_msr_entry msr; int r; + /* Unconditionally clear the output for simplicity */ + msr.data = 0; msr.index = index; r = kvm_get_msr_feature(&msr); - if (r == KVM_MSR_RET_INVALID) { - /* Unconditionally clear the output for simplicity */ - *data = 0; - if (kvm_msr_ignored_check(index, 0, false)) - r = 0; - } - - if (r) - return r; + if (r == KVM_MSR_RET_INVALID && kvm_msr_ignored_check(index, 0, false)) + r = 0; *data = msr.data; - return 0; + return r; } static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) -- GitLab From 4e6c9011990726f4d175e2cdfebe5b0b8cce4839 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 3 Feb 2024 10:45:21 +0800 Subject: [PATCH 0698/1405] scsi: core: Move scsi_host_busy() out of host lock if it is for per-command Commit 4373534a9850 ("scsi: core: Move scsi_host_busy() out of host lock for waking up EH handler") intended to fix a hard lockup issue triggered by EH. The core idea was to move scsi_host_busy() out of the host lock when processing individual commands for EH. However, a suggested style change inadvertently caused scsi_host_busy() to remain under the host lock. Fix this by calling scsi_host_busy() outside the lock. Fixes: 4373534a9850 ("scsi: core: Move scsi_host_busy() out of host lock for waking up EH handler") Cc: Sathya Prakash Veerichetty Cc: Bart Van Assche Cc: Ewan D. Milne Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20240203024521.2006455-1-ming.lei@redhat.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_error.c | 3 ++- drivers/scsi/scsi_lib.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 4f455884fdc44..612489afe8d24 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -282,11 +282,12 @@ static void scsi_eh_inc_host_failed(struct rcu_head *head) { struct scsi_cmnd *scmd = container_of(head, typeof(*scmd), rcu); struct Scsi_Host *shost = scmd->device->host; + unsigned int busy = scsi_host_busy(shost); unsigned long flags; spin_lock_irqsave(shost->host_lock, flags); shost->host_failed++; - scsi_eh_wakeup(shost, scsi_host_busy(shost)); + scsi_eh_wakeup(shost, busy); spin_unlock_irqrestore(shost->host_lock, flags); } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 1fb80eae9a63a..df5ac03d5d6c2 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -278,9 +278,11 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd) rcu_read_lock(); __clear_bit(SCMD_STATE_INFLIGHT, &cmd->state); if (unlikely(scsi_host_in_recovery(shost))) { + unsigned int busy = scsi_host_busy(shost); + spin_lock_irqsave(shost->host_lock, flags); if (shost->host_failed || shost->host_eh_scheduled) - scsi_eh_wakeup(shost, scsi_host_busy(shost)); + scsi_eh_wakeup(shost, busy); spin_unlock_irqrestore(shost->host_lock, flags); } rcu_read_unlock(); -- GitLab From d6c1b19153f92e95e5e1801d540e98771053afae Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 20 Dec 2023 17:26:58 +0100 Subject: [PATCH 0699/1405] scsi: lpfc: Use unsigned type for num_sge LUNs going into "failed ready running" state observed on >1T and on even numbers of size (2T, 4T, 6T, 8T and 10T). The issue occurs when DIF is enabled at the host. The kernel logs: Cannot setup S/G List for HBAIO segs 1/1 SGL 512 SCSI 256: 3 0 The host lpfc driver is failing to setup scatter/gather list (protection data) for the I/Os. The return type lpfc_bg_setup_sgl()/lpfc_bg_setup_sgl_prot() causes the compiler to remove the most significant bit. Use an unsigned type instead. Signed-off-by: Hannes Reinecke [dwagner: added commit message] Signed-off-by: Daniel Wagner Link: https://lore.kernel.org/r/20231220162658.12392-1-dwagner@suse.de Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_scsi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index d26941b131fdb..bf879d81846b6 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -1918,7 +1918,7 @@ lpfc_bg_setup_bpl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc, * * Returns the number of SGEs added to the SGL. **/ -static int +static uint32_t lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct sli4_sge *sgl, int datasegcnt, struct lpfc_io_buf *lpfc_cmd) @@ -1926,8 +1926,8 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct scatterlist *sgde = NULL; /* s/g data entry */ struct sli4_sge_diseed *diseed = NULL; dma_addr_t physaddr; - int i = 0, num_sge = 0, status; - uint32_t reftag; + int i = 0, status; + uint32_t reftag, num_sge = 0; uint8_t txop, rxop; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS uint32_t rc; @@ -2099,7 +2099,7 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc, * * Returns the number of SGEs added to the SGL. **/ -static int +static uint32_t lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct sli4_sge *sgl, int datacnt, int protcnt, struct lpfc_io_buf *lpfc_cmd) @@ -2123,8 +2123,8 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc, uint32_t rc; #endif uint32_t checking = 1; - uint32_t dma_offset = 0; - int num_sge = 0, j = 2; + uint32_t dma_offset = 0, num_sge = 0; + int j = 2; struct sli4_hybrid_sgl *sgl_xtra = NULL; sgpe = scsi_prot_sglist(sc); -- GitLab From b513d30d59bb383a6a5d6b533afcab2cee99a8f8 Mon Sep 17 00:00:00 2001 From: Alice Chao Date: Mon, 5 Feb 2024 18:49:04 +0800 Subject: [PATCH 0700/1405] scsi: ufs: core: Fix shift issue in ufshcd_clear_cmd() When task_tag >= 32 (in MCQ mode) and sizeof(unsigned int) == 4, 1U << task_tag will out of bounds for a u32 mask. Fix this up to prevent SHIFT_ISSUE (bitwise shifts that are out of bounds for their data type). [name:debug_monitors&]Unexpected kernel BRK exception at EL1 [name:traps&]Internal error: BRK handler: 00000000f2005514 [#1] PREEMPT SMP [name:mediatek_cpufreq_hw&]cpufreq stop DVFS log done [name:mrdump&]Kernel Offset: 0x1ba5800000 from 0xffffffc008000000 [name:mrdump&]PHYS_OFFSET: 0x80000000 [name:mrdump&]pstate: 22400005 (nzCv daif +PAN -UAO) [name:mrdump&]pc : [0xffffffdbaf52bb2c] ufshcd_clear_cmd+0x280/0x288 [name:mrdump&]lr : [0xffffffdbaf52a774] ufshcd_wait_for_dev_cmd+0x3e4/0x82c [name:mrdump&]sp : ffffffc0081471b0 Workqueue: ufs_eh_wq_0 ufshcd_err_handler Call trace: dump_backtrace+0xf8/0x144 show_stack+0x18/0x24 dump_stack_lvl+0x78/0x9c dump_stack+0x18/0x44 mrdump_common_die+0x254/0x480 [mrdump] ipanic_die+0x20/0x30 [mrdump] notify_die+0x15c/0x204 die+0x10c/0x5f8 arm64_notify_die+0x74/0x13c do_debug_exception+0x164/0x26c el1_dbg+0x64/0x80 el1h_64_sync_handler+0x3c/0x90 el1h_64_sync+0x68/0x6c ufshcd_clear_cmd+0x280/0x288 ufshcd_wait_for_dev_cmd+0x3e4/0x82c ufshcd_exec_dev_cmd+0x5bc/0x9ac ufshcd_verify_dev_init+0x84/0x1c8 ufshcd_probe_hba+0x724/0x1ce0 ufshcd_host_reset_and_restore+0x260/0x574 ufshcd_reset_and_restore+0x138/0xbd0 ufshcd_err_handler+0x1218/0x2f28 process_one_work+0x5fc/0x1140 worker_thread+0x7d8/0xe20 kthread+0x25c/0x468 ret_from_fork+0x10/0x20 Signed-off-by: Alice Chao Link: https://lore.kernel.org/r/20240205104905.24929-1-alice.chao@mediatek.com Reviewed-by: Stanley Jhu Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 029d017fc1b66..c6cff4aa440a3 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -3057,7 +3057,7 @@ bool ufshcd_cmd_inflight(struct scsi_cmnd *cmd) */ static int ufshcd_clear_cmd(struct ufs_hba *hba, u32 task_tag) { - u32 mask = 1U << task_tag; + u32 mask; unsigned long flags; int err; @@ -3075,6 +3075,8 @@ static int ufshcd_clear_cmd(struct ufs_hba *hba, u32 task_tag) return 0; } + mask = 1U << task_tag; + /* clear outstanding transaction before retry */ spin_lock_irqsave(hba->host->host_lock, flags); ufshcd_utrl_clear(hba, mask); -- GitLab From 17e94b2585417e04dabc2f13bc03b4665ae687f3 Mon Sep 17 00:00:00 2001 From: SEO HOYOUNG Date: Mon, 22 Jan 2024 17:33:24 +0900 Subject: [PATCH 0701/1405] scsi: ufs: core: Remove the ufshcd_release() in ufshcd_err_handling_prepare() If ufshcd_err_handler() is called in a suspend/resume situation, ufs_release() can be called twice and active_reqs end up going negative. This is because ufshcd_err_handling_prepare() and ufshcd_err_handling_unprepare() both call ufshcd_release(). Remove superfluous call to ufshcd_release(). Signed-off-by: SEO HOYOUNG Link: https://lore.kernel.org/r/20240122083324.11797-1-hy50.seo@samsung.com Reviewed-by: Bart Van Assche Reviewed-by: Can Guo Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index c6cff4aa440a3..d77b25b79ae3e 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6354,7 +6354,6 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba) ufshcd_hold(hba); if (!ufshcd_is_clkgating_allowed(hba)) ufshcd_setup_clocks(hba, true); - ufshcd_release(hba); pm_op = hba->is_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM; ufshcd_vops_resume(hba, pm_op); } else { -- GitLab From aac8a59537dfc704ff344f1aacfd143c089ee20f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Feb 2024 15:43:41 -1000 Subject: [PATCH 0702/1405] Revert "workqueue: Override implicit ordered attribute in workqueue_apply_unbound_cpumask()" This reverts commit ca10d851b9ad0338c19e8e3089e24d565ebfffd7. The commit allowed workqueue_apply_unbound_cpumask() to clear __WQ_ORDERED on now removed implicitly ordered workqueues. This was incorrect in that system-wide config change shouldn't break ordering properties of all workqueues. The reason why apply_workqueue_attrs() path was allowed to do so was because it was targeting the specific workqueue - either the workqueue had WQ_SYSFS set or the workqueue user specifically tried to change max_active, both of which indicate that the workqueue doesn't need to be ordered. The implicitly ordered workqueue promotion was removed by the previous commit 3bc1e711c26b ("workqueue: Don't implicitly make UNBOUND workqueues w/ @max_active==1 ordered"). However, it didn't update this path and broke build. Let's revert the commit which was incorrect in the first place which also fixes build. Signed-off-by: Tejun Heo Fixes: 3bc1e711c26b ("workqueue: Don't implicitly make UNBOUND workqueues w/ @max_active==1 ordered") Fixes: ca10d851b9ad ("workqueue: Override implicit ordered attribute in workqueue_apply_unbound_cpumask()") Cc: stable@vger.kernel.org # v6.6+ Signed-off-by: Tejun Heo --- kernel/workqueue.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 76e60faed8923..7b482a26d7419 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -5786,13 +5786,9 @@ static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask) list_for_each_entry(wq, &workqueues, list) { if (!(wq->flags & WQ_UNBOUND)) continue; - /* creating multiple pwqs breaks ordering guarantee */ - if (!list_empty(&wq->pwqs)) { - if (wq->flags & __WQ_ORDERED_EXPLICIT) - continue; - wq->flags &= ~__WQ_ORDERED; - } + if (wq->flags & __WQ_ORDERED) + continue; ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs, unbound_cpumask); if (IS_ERR(ctx)) { -- GitLab From d0399da9fb5f8e3d897b9776bffee2d3bfe20210 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 29 Jan 2024 19:04:13 -0800 Subject: [PATCH 0703/1405] drm/sched: Re-queue run job worker when drm_sched_entity_pop_job() returns NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather then loop over entities until one with a ready job is found, re-queue the run job worker when drm_sched_entity_pop_job() returns NULL. Signed-off-by: Matthew Brost Reviewed-by: Christian König Fixes: 66dbd9004a55 ("drm/sched: Drain all entities in DRM sched run job worker") Reviewed-by: Luben Tuikov Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20240130030413.2031009-1-matthew.brost@intel.com --- drivers/gpu/drm/scheduler/sched_main.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 85f082396d42d..d442b893275b9 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -1178,21 +1178,24 @@ static void drm_sched_run_job_work(struct work_struct *w) struct drm_sched_entity *entity; struct dma_fence *fence; struct drm_sched_fence *s_fence; - struct drm_sched_job *sched_job = NULL; + struct drm_sched_job *sched_job; int r; if (READ_ONCE(sched->pause_submit)) return; /* Find entity with a ready job */ - while (!sched_job && (entity = drm_sched_select_entity(sched))) { - sched_job = drm_sched_entity_pop_job(entity); - if (!sched_job) - complete_all(&entity->entity_idle); - } + entity = drm_sched_select_entity(sched); if (!entity) return; /* No more work */ + sched_job = drm_sched_entity_pop_job(entity); + if (!sched_job) { + complete_all(&entity->entity_idle); + drm_sched_run_job_queue(sched); + return; + } + s_fence = sched_job->s_fence; atomic_add(sched_job->credits, &sched->credit_count); -- GitLab From b3ff2d9c3a9c64cd0a011cdd407ffc38a6ea8788 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Feb 2024 12:32:05 +0800 Subject: [PATCH 0704/1405] LoongArch: Select ARCH_ENABLE_THP_MIGRATION instead of redefining it ARCH_ENABLE_THP_MIGRATION is supposed to be selected by arch Kconfig. Signed-off-by: Masahiro Yamada Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 10959e6c35832..64e9a01c7f36e 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -12,6 +12,7 @@ config LOONGARCH select ARCH_DISABLE_KASAN_INLINE select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE + select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_FORTIFY_SOURCE @@ -667,10 +668,6 @@ config ARCH_SPARSEMEM_ENABLE or have huge holes in the physical address space for other reasons. See for more. -config ARCH_ENABLE_THP_MIGRATION - def_bool y - depends on TRANSPARENT_HUGEPAGE - config ARCH_MEMORY_PROBE def_bool y depends on MEMORY_HOTPLUG -- GitLab From 6b79ecd084c99b31c8b4d0beda08893716d5558e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 6 Feb 2024 12:32:05 +0800 Subject: [PATCH 0705/1405] LoongArch: Select HAVE_ARCH_SECCOMP to use the common SECCOMP menu LoongArch missed the refactoring made by commit 282a181b1a0d ("seccomp: Move config option SECCOMP to arch/Kconfig") because LoongArch was not mainlined at that time. The 'depends on PROC_FS' statement is stale as described in that commit. Select HAVE_ARCH_SECCOMP, and remove the duplicated config entry. Signed-off-by: Masahiro Yamada Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 64e9a01c7f36e..929f68926b343 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -100,6 +100,7 @@ config LOONGARCH select HAVE_ARCH_KFENCE select HAVE_ARCH_KGDB if PERF_EVENTS select HAVE_ARCH_MMAP_RND_BITS if MMU + select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE @@ -633,23 +634,6 @@ config RANDOMIZE_BASE_MAX_OFFSET This is limited by the size of the lower address memory, 256MB. -config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" - depends on PROC_FS - default y - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via /proc//seccomp, it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. Only embedded should say N here. - endmenu config ARCH_SELECT_MEMORY_MODEL -- GitLab From 4551b30525cf3d2f026b92401ffe241eb04dfebe Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 6 Feb 2024 12:32:05 +0800 Subject: [PATCH 0706/1405] LoongArch: Change acpi_core_pic[NR_CPUS] to acpi_core_pic[MAX_CORE_PIC] With default config, the value of NR_CPUS is 64. When HW platform has more then 64 cpus, system will crash on these platforms. MAX_CORE_PIC is the maximum cpu number in MADT table (max physical number) which can exceed the supported maximum cpu number (NR_CPUS, max logical number), but kernel should not crash. Kernel should boot cpus with NR_CPUS, let the remainder cpus stay in BIOS. The potential crash reason is that the array acpi_core_pic[NR_CPUS] can be overflowed when parsing MADT table, and it is obvious that CORE_PIC should be corresponding to physical core rather than logical core, so it is better to define the array as acpi_core_pic[MAX_CORE_PIC]. With the patch, system can boot up 64 vcpus with qemu parameter -smp 128, otherwise system will crash with the following message. [ 0.000000] CPU 0 Unable to handle kernel paging request at virtual address 0000420000004259, era == 90000000037a5f0c, ra == 90000000037a46ec [ 0.000000] Oops[#1]: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 6.8.0-rc2+ #192 [ 0.000000] Hardware name: QEMU QEMU Virtual Machine, BIOS unknown 2/2/2022 [ 0.000000] pc 90000000037a5f0c ra 90000000037a46ec tp 9000000003c90000 sp 9000000003c93d60 [ 0.000000] a0 0000000000000019 a1 9000000003d93bc0 a2 0000000000000000 a3 9000000003c93bd8 [ 0.000000] a4 9000000003c93a74 a5 9000000083c93a67 a6 9000000003c938f0 a7 0000000000000005 [ 0.000000] t0 0000420000004201 t1 0000000000000000 t2 0000000000000001 t3 0000000000000001 [ 0.000000] t4 0000000000000003 t5 0000000000000000 t6 0000000000000030 t7 0000000000000063 [ 0.000000] t8 0000000000000014 u0 ffffffffffffffff s9 0000000000000000 s0 9000000003caee98 [ 0.000000] s1 90000000041b0480 s2 9000000003c93da0 s3 9000000003c93d98 s4 9000000003c93d90 [ 0.000000] s5 9000000003caa000 s6 000000000a7fd000 s7 000000000f556b60 s8 000000000e0a4330 [ 0.000000] ra: 90000000037a46ec platform_init+0x214/0x250 [ 0.000000] ERA: 90000000037a5f0c efi_runtime_init+0x30/0x94 [ 0.000000] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) [ 0.000000] PRMD: 00000000 (PPLV0 -PIE -PWE) [ 0.000000] EUEN: 00000000 (-FPE -SXE -ASXE -BTE) [ 0.000000] ECFG: 00070800 (LIE=11 VS=7) [ 0.000000] ESTAT: 00010000 [PIL] (IS= ECode=1 EsubCode=0) [ 0.000000] BADV: 0000420000004259 [ 0.000000] PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) [ 0.000000] Modules linked in: [ 0.000000] Process swapper (pid: 0, threadinfo=(____ptrval____), task=(____ptrval____)) [ 0.000000] Stack : 9000000003c93a14 9000000003800898 90000000041844f8 90000000037a46ec [ 0.000000] 000000000a7fd000 0000000008290000 0000000000000000 0000000000000000 [ 0.000000] 0000000000000000 0000000000000000 00000000019d8000 000000000f556b60 [ 0.000000] 000000000a7fd000 000000000f556b08 9000000003ca7700 9000000003800000 [ 0.000000] 9000000003c93e50 9000000003800898 9000000003800108 90000000037a484c [ 0.000000] 000000000e0a4330 000000000f556b60 000000000a7fd000 000000000f556b08 [ 0.000000] 9000000003ca7700 9000000004184000 0000000000200000 000000000e02b018 [ 0.000000] 000000000a7fd000 90000000037a0790 9000000003800108 0000000000000000 [ 0.000000] 0000000000000000 000000000e0a4330 000000000f556b60 000000000a7fd000 [ 0.000000] 000000000f556b08 000000000eaae298 000000000eaa5040 0000000000200000 [ 0.000000] ... [ 0.000000] Call Trace: [ 0.000000] [<90000000037a5f0c>] efi_runtime_init+0x30/0x94 [ 0.000000] [<90000000037a46ec>] platform_init+0x214/0x250 [ 0.000000] [<90000000037a484c>] setup_arch+0x124/0x45c [ 0.000000] [<90000000037a0790>] start_kernel+0x90/0x670 [ 0.000000] [<900000000378b0d8>] kernel_entry+0xd8/0xdc Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/acpi.h | 4 +++- arch/loongarch/kernel/acpi.c | 4 +--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index 8de6c4b83a61a..49e29b29996f0 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -32,8 +32,10 @@ static inline bool acpi_has_cpu_in_madt(void) return true; } +#define MAX_CORE_PIC 256 + extern struct list_head acpi_wakeup_device_list; -extern struct acpi_madt_core_pic acpi_core_pic[NR_CPUS]; +extern struct acpi_madt_core_pic acpi_core_pic[MAX_CORE_PIC]; extern int __init parse_acpi_topology(void); diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index b6b097bbf8668..5cf59c617126b 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -29,11 +29,9 @@ int disabled_cpus; u64 acpi_saved_sp; -#define MAX_CORE_PIC 256 - #define PREFIX "ACPI: " -struct acpi_madt_core_pic acpi_core_pic[NR_CPUS]; +struct acpi_madt_core_pic acpi_core_pic[MAX_CORE_PIC]; void __init __iomem * __acpi_map_table(unsigned long phys, unsigned long size) { -- GitLab From 639420e9f6cd9ca074732b17ac450d2518d5937f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 6 Feb 2024 12:32:05 +0800 Subject: [PATCH 0707/1405] LoongArch: Fix earlycon parameter if KASAN enabled The earlycon parameter is based on fixmap, and fixmap addresses are not supposed to be shadowed by KASAN. So return the kasan_early_shadow_page in kasan_mem_to_shadow() if the input address is above FIXADDR_START. Otherwise earlycon cannot work after kasan_init(). Cc: stable@vger.kernel.org Fixes: 5aa4ac64e6add3e ("LoongArch: Add KASAN (Kernel Address Sanitizer) support") Signed-off-by: Huacai Chen --- arch/loongarch/mm/kasan_init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c index cc3e81fe0186f..c608adc998458 100644 --- a/arch/loongarch/mm/kasan_init.c +++ b/arch/loongarch/mm/kasan_init.c @@ -44,6 +44,9 @@ void *kasan_mem_to_shadow(const void *addr) unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff; unsigned long offset = 0; + if (maddr >= FIXADDR_START) + return (void *)(kasan_early_shadow_page); + maddr &= XRANGE_SHADOW_MASK; switch (xrange) { case XKPRANGE_CC_SEG: -- GitLab From cca5efe77a6a2d02b3da4960f799fa233e460ab1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 6 Feb 2024 12:32:05 +0800 Subject: [PATCH 0708/1405] LoongArch: vDSO: Disable UBSAN instrumentation The vDSO executes in userspace, so the kernel's UBSAN should not instrument it. Solves these kind of build errors: loongarch64-linux-ld: arch/loongarch/vdso/vgettimeofday.o: in function `vdso_shift_ns': lib/vdso/gettimeofday.c:23:(.text+0x3f8): undefined reference to `__ubsan_handle_shift_out_of_bounds' Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401310530.lZHCj1Zl-lkp@intel.com/ Cc: Huacai Chen Cc: WANG Xuerui Cc: Vincenzo Frascino Cc: Nathan Chancellor Cc: Masahiro Yamada Cc: Fangrui Song Cc: loongarch@lists.linux.dev Signed-off-by: Kees Cook Signed-off-by: Huacai Chen --- arch/loongarch/vdso/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index c74c9921304f2..f597cd08a96be 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -2,6 +2,7 @@ # Objects to go into the VDSO. KASAN_SANITIZE := n +UBSAN_SANITIZE := n KCOV_INSTRUMENT := n # Include the generic Makefile to check the built vdso. -- GitLab From 7bca405c986075c99b9f729d3587b5c45db39d01 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 19 Jan 2024 19:50:26 +0100 Subject: [PATCH 0709/1405] bus: imx-weim: fix valid range check When the range parsing was open-coded the number of u32 entries to parse had to be a multiple of 4 and the driver checks this. With the range parsing converted to the range parser the counting changes from individual u32 entries to a complete range, so the check must not reject counts not divisible by 4. Fixes: 2a88e4792c6d ("bus: imx-weim: Remove open coded "ranges" parsing") Signed-off-by: Lucas Stach Signed-off-by: Shawn Guo --- drivers/bus/imx-weim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c index 6b5da73c85417..837bf9d51c6ec 100644 --- a/drivers/bus/imx-weim.c +++ b/drivers/bus/imx-weim.c @@ -120,7 +120,7 @@ static int imx_weim_gpr_setup(struct platform_device *pdev) i++; } - if (i == 0 || i % 4) + if (i == 0) goto err; for (i = 0; i < ARRAY_SIZE(gprvals); i++) { -- GitLab From 3871aa01e1a779d866fa9dfdd5a836f342f4eb87 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Thu, 1 Feb 2024 00:23:09 +0900 Subject: [PATCH 0710/1405] tipc: Check the bearer type before calling tipc_udp_nl_bearer_add() syzbot reported the following general protection fault [1]: general protection fault, probably for non-canonical address 0xdffffc0000000010: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000080-0x0000000000000087] ... RIP: 0010:tipc_udp_is_known_peer+0x9c/0x250 net/tipc/udp_media.c:291 ... Call Trace: tipc_udp_nl_bearer_add+0x212/0x2f0 net/tipc/udp_media.c:646 tipc_nl_bearer_add+0x21e/0x360 net/tipc/bearer.c:1089 genl_family_rcv_msg_doit+0x1fc/0x2e0 net/netlink/genetlink.c:972 genl_family_rcv_msg net/netlink/genetlink.c:1052 [inline] genl_rcv_msg+0x561/0x800 net/netlink/genetlink.c:1067 netlink_rcv_skb+0x16b/0x440 net/netlink/af_netlink.c:2544 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1076 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x53b/0x810 net/netlink/af_netlink.c:1367 netlink_sendmsg+0x8b7/0xd70 net/netlink/af_netlink.c:1909 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 ____sys_sendmsg+0x6ac/0x940 net/socket.c:2584 ___sys_sendmsg+0x135/0x1d0 net/socket.c:2638 __sys_sendmsg+0x117/0x1e0 net/socket.c:2667 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b The cause of this issue is that when tipc_nl_bearer_add() is called with the TIPC_NLA_BEARER_UDP_OPTS attribute, tipc_udp_nl_bearer_add() is called even if the bearer is not UDP. tipc_udp_is_known_peer() called by tipc_udp_nl_bearer_add() assumes that the media_ptr field of the tipc_bearer has an udp_bearer type object, so the function goes crazy for non-UDP bearers. This patch fixes the issue by checking the bearer type before calling tipc_udp_nl_bearer_add() in tipc_nl_bearer_add(). Fixes: ef20cd4dd163 ("tipc: introduce UDP replicast") Reported-and-tested-by: syzbot+5142b87a9abc510e14fa@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=5142b87a9abc510e14fa [1] Signed-off-by: Shigeru Yoshida Reviewed-by: Tung Nguyen Link: https://lore.kernel.org/r/20240131152310.4089541-1-syoshida@redhat.com Signed-off-by: Paolo Abeni --- net/tipc/bearer.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 2cde375477e38..878415c435276 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -1086,6 +1086,12 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) #ifdef CONFIG_TIPC_MEDIA_UDP if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) { + if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { + rtnl_unlock(); + NL_SET_ERR_MSG(info->extack, "UDP option is unsupported"); + return -EINVAL; + } + err = tipc_udp_nl_bearer_add(b, attrs[TIPC_NLA_BEARER_UDP_OPTS]); if (err) { -- GitLab From e459647710070684a48384d67742822379de8c1c Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 2 Feb 2024 16:53:21 -0700 Subject: [PATCH 0711/1405] x86/coco: Define cc_vendor without CONFIG_ARCH_HAS_CC_PLATFORM After commit a9ef277488cf ("x86/kvm: Fix SEV check in sev_map_percpu_data()"), there is a build error when building x86_64_defconfig with GCOV using LLVM: ld.lld: error: undefined symbol: cc_vendor >>> referenced by kvm.c >>> arch/x86/kernel/kvm.o:(kvm_smp_prepare_boot_cpu) in archive vmlinux.a which corresponds to if (cc_vendor != CC_VENDOR_AMD || !cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) return; Without GCOV, clang is able to eliminate the use of cc_vendor because cc_platform_has() evaluates to false when CONFIG_ARCH_HAS_CC_PLATFORM is not set, meaning that if statement will be true no matter what value cc_vendor has. With GCOV, the instrumentation keeps the use of cc_vendor around for code coverage purposes but cc_vendor is only declared, not defined, without CONFIG_ARCH_HAS_CC_PLATFORM, leading to the build error above. Provide a macro definition of cc_vendor when CONFIG_ARCH_HAS_CC_PLATFORM is not set with a value of CC_VENDOR_NONE, so that the first condition can always be evaluated/eliminated at compile time, avoiding the build error altogether. This is very similar to the situation prior to commit da86eb961184 ("x86/coco: Get rid of accessor functions"). Signed-off-by: Nathan Chancellor Acked-by: Borislav Petkov (AMD) Message-Id: <20240202-provide-cc_vendor-without-arch_has_cc_platform-v1-1-09ad5f2a3099@kernel.org> Fixes: a9ef277488cf ("x86/kvm: Fix SEV check in sev_map_percpu_data()", 2024-01-31) Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/coco.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index 6ae2d16a7613b..76c310b19b11d 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -10,13 +10,14 @@ enum cc_vendor { CC_VENDOR_INTEL, }; -extern enum cc_vendor cc_vendor; - #ifdef CONFIG_ARCH_HAS_CC_PLATFORM +extern enum cc_vendor cc_vendor; void cc_set_mask(u64 mask); u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); #else +#define cc_vendor (CC_VENDOR_NONE) + static inline u64 cc_mkenc(u64 val) { return val; -- GitLab From b083d24fcf57580cc0b45dd7b0b4f84d8c4624f4 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 2 Feb 2024 02:24:59 +0000 Subject: [PATCH 0712/1405] selftests/net: Amend per-netns counter checks Selftests here check not only that connect()/accept() for TCP-AO/TCP-MD5/non-signed-TCP combinations do/don't establish connections, but also counters: those are per-AO-key, per-socket and per-netns. The counters are checked on the server's side, as the server listener has TCP-AO/TCP-MD5/no keys for different peers. All tests run in the same namespaces with the same veth pair, created in test_init(). After close() in both client and server, the sides go through the regular FIN/ACK + FIN/ACK sequence, which goes in the background. If the selftest has already started a new testing scenario, read per-netns counters - it may fail in the end iff it doesn't expect the TCPAOGood per-netns counters go up during the test. Let's just kill both TCP-AO sides - that will avoid any asynchronous background TCP-AO segments going to either sides. Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/all/20240201132153.4d68f45e@kernel.org/T/#u Fixes: 6f0c472a6815 ("selftests/net: Add TCP-AO + TCP-MD5 + no sign listen socket tests") Signed-off-by: Dmitry Safonov Link: https://lore.kernel.org/r/20240202-unsigned-md5-netns-counters-v1-1-8b90c37c0566@arista.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/tcp_ao/unsigned-md5.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c index c5b568cd7d901..6b59a652159f7 100644 --- a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c +++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c @@ -110,9 +110,9 @@ static void try_accept(const char *tst_name, unsigned int port, test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); out: - synchronize_threads(); /* close() */ + synchronize_threads(); /* test_kill_sk() */ if (sk > 0) - close(sk); + test_kill_sk(sk); } static void server_add_routes(void) @@ -302,10 +302,10 @@ static void try_connect(const char *tst_name, unsigned int port, test_ok("%s: connected", tst_name); out: - synchronize_threads(); /* close() */ + synchronize_threads(); /* test_kill_sk() */ /* _test_connect_socket() cleans up on failure */ if (ret > 0) - close(sk); + test_kill_sk(sk); } #define PREINSTALL_MD5_FIRST BIT(0) @@ -486,10 +486,10 @@ static void try_to_add(const char *tst_name, unsigned int port, } out: - synchronize_threads(); /* close() */ + synchronize_threads(); /* test_kill_sk() */ /* _test_connect_socket() cleans up on failure */ if (ret > 0) - close(sk); + test_kill_sk(sk); } static void client_add_ip(union tcp_addr *client, const char *ip) -- GitLab From 610010737f74482a61896596a0116876ecf9e65c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 5 Feb 2024 15:48:53 -0600 Subject: [PATCH 0713/1405] ASoC: amd: yc: Add DMI quirk for Lenovo Ideapad Pro 5 16ARP8 The laptop requires a quirk ID to enable its internal microphone. Add it to the DMI quirk table. Reported-by: Stanislav Petrov Closes: https://bugzilla.kernel.org/show_bug.cgi?id=216925 Cc: stable@vger.kernel.org Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20240205214853.2689-1-mario.limonciello@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 23d44a50d8157..80ad60d485ea0 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -248,6 +248,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "82YM"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83AS"), + } + }, { .driver_data = &acp6x_card, .matches = { -- GitLab From dad6a09f3148257ac1773cd90934d721d68ab595 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 29 Jan 2024 15:56:36 -0800 Subject: [PATCH 0714/1405] hrtimer: Report offline hrtimer enqueue The hrtimers migration on CPU-down hotplug process has been moved earlier, before the CPU actually goes to die. This leaves a small window of opportunity to queue an hrtimer in a blind spot, leaving it ignored. For example a practical case has been reported with RCU waking up a SCHED_FIFO task right before the CPUHP_AP_IDLE_DEAD stage, queuing that way a sched/rt timer to the local offline CPU. Make sure such situations never go unnoticed and warn when that happens. Fixes: 5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier") Reported-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240129235646.3171983-4-boqun.feng@gmail.com --- include/linux/hrtimer.h | 4 +++- kernel/time/hrtimer.c | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 87e3bedf8eb00..641c4567cfa7a 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -157,6 +157,7 @@ enum hrtimer_base_type { * @max_hang_time: Maximum time spent in hrtimer_interrupt * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are * expired + * @online: CPU is online from an hrtimers point of view * @timer_waiters: A hrtimer_cancel() invocation waits for the timer * callback to finish. * @expires_next: absolute time of the next event, is required for remote @@ -179,7 +180,8 @@ struct hrtimer_cpu_base { unsigned int hres_active : 1, in_hrtirq : 1, hang_detected : 1, - softirq_activated : 1; + softirq_activated : 1, + online : 1; #ifdef CONFIG_HIGH_RES_TIMERS unsigned int nr_events; unsigned short nr_retries; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 760793998cdd7..edb0f821dceaa 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1085,6 +1085,7 @@ static int enqueue_hrtimer(struct hrtimer *timer, enum hrtimer_mode mode) { debug_activate(timer, mode); + WARN_ON_ONCE(!base->cpu_base->online); base->cpu_base->active_bases |= 1 << base->index; @@ -2183,6 +2184,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) cpu_base->softirq_next_timer = NULL; cpu_base->expires_next = KTIME_MAX; cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->online = 1; hrtimer_cpu_base_init_expiry_lock(cpu_base); return 0; } @@ -2250,6 +2252,7 @@ int hrtimers_cpu_dying(unsigned int dying_cpu) smp_call_function_single(ncpu, retrigger_next_event, NULL, 0); raw_spin_unlock(&new_base->lock); + old_base->online = 0; raw_spin_unlock(&old_base->lock); return 0; -- GitLab From 5c84bc8b617bf90e722cc57d447abd9a468d3a52 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 23 Jan 2024 13:51:41 +0100 Subject: [PATCH 0715/1405] powerpc: udbg_memcons: mark functions static ppc64_book3e_allmodconfig has one more driver that triggeres a few missing-prototypes warnings: arch/powerpc/sysdev/udbg_memcons.c:44:6: error: no previous prototype for 'memcons_putc' [-Werror=missing-prototypes] arch/powerpc/sysdev/udbg_memcons.c:57:5: error: no previous prototype for 'memcons_getc_poll' [-Werror=missing-prototypes] arch/powerpc/sysdev/udbg_memcons.c:80:5: error: no previous prototype for 'memcons_getc' [-Werror=missing-prototypes] Mark all these function static as there are no other users. Signed-off-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://msgid.link/20240123125148.2004648-1-arnd@kernel.org --- arch/powerpc/sysdev/udbg_memcons.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/sysdev/udbg_memcons.c b/arch/powerpc/sysdev/udbg_memcons.c index 5020044400dcb..4de57ba522365 100644 --- a/arch/powerpc/sysdev/udbg_memcons.c +++ b/arch/powerpc/sysdev/udbg_memcons.c @@ -41,7 +41,7 @@ struct memcons memcons = { .input_end = &memcons_input[CONFIG_PPC_MEMCONS_INPUT_SIZE], }; -void memcons_putc(char c) +static void memcons_putc(char c) { char *new_output_pos; @@ -54,7 +54,7 @@ void memcons_putc(char c) memcons.output_pos = new_output_pos; } -int memcons_getc_poll(void) +static int memcons_getc_poll(void) { char c; char *new_input_pos; @@ -77,7 +77,7 @@ int memcons_getc_poll(void) return -1; } -int memcons_getc(void) +static int memcons_getc(void) { int c; -- GitLab From 1c57b9f63ab34f01b8c73731cc0efacb5a9a2f16 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 23 Jan 2024 13:51:42 +0100 Subject: [PATCH 0716/1405] powerpc: 85xx: mark local functions static These functions are either used in only one file and can just be made static or need an #include statement to avoid a warning: arch/powerpc/platforms/85xx/mpc8536_ds.c:30:13: error: no previous prototype for 'mpc8536_ds_pic_init' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/p1010rdb.c:27:13: error: no previous prototype for 'p1010_rdb_pic_init' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/p1022_ds.c:373:6: error: no previous prototype for 'p1022ds_set_pixel_clock' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/p1022_ds.c:422:1: error: no previous prototype for 'p1022ds_valid_monitor_port' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/p1022_ds.c:435:13: error: no previous prototype for 'p1022_ds_pic_init' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/p1022_rdk.c:43:6: error: no previous prototype for 'p1022rdk_set_pixel_clock' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/p1022_rdk.c:92:1: error: no previous prototype for 'p1022rdk_valid_monitor_port' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/p1022_rdk.c:99:13: error: no previous prototype for 'p1022_rdk_pic_init' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/socrates_fpga_pic.c:273:13: error: no previous prototype for 'socrates_fpga_pic_init' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/xes_mpc85xx.c:40:13: error: no previous prototype for 'xes_mpc85xx_pic_init' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/mvme2500.c:24:13: error: no previous prototype for 'mvme2500_pic_init' [-Werror=missing-prototypes] Signed-off-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://msgid.link/20240123125148.2004648-2-arnd@kernel.org --- arch/powerpc/platforms/85xx/mpc8536_ds.c | 2 +- arch/powerpc/platforms/85xx/mvme2500.c | 2 +- arch/powerpc/platforms/85xx/p1010rdb.c | 2 +- arch/powerpc/platforms/85xx/p1022_ds.c | 6 +++--- arch/powerpc/platforms/85xx/p1022_rdk.c | 6 +++--- arch/powerpc/platforms/85xx/socrates_fpga_pic.c | 2 ++ arch/powerpc/platforms/85xx/xes_mpc85xx.c | 2 +- 7 files changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c index e966b2ad8ecd4..b3327a358eb43 100644 --- a/arch/powerpc/platforms/85xx/mpc8536_ds.c +++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c @@ -27,7 +27,7 @@ #include "mpc85xx.h" -void __init mpc8536_ds_pic_init(void) +static void __init mpc8536_ds_pic_init(void) { struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, 0, 256, " OpenPIC "); diff --git a/arch/powerpc/platforms/85xx/mvme2500.c b/arch/powerpc/platforms/85xx/mvme2500.c index 1b59e45a0c64f..19122daadb55a 100644 --- a/arch/powerpc/platforms/85xx/mvme2500.c +++ b/arch/powerpc/platforms/85xx/mvme2500.c @@ -21,7 +21,7 @@ #include "mpc85xx.h" -void __init mvme2500_pic_init(void) +static void __init mvme2500_pic_init(void) { struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c index 10d6f1fa33275..491895ac8bcfe 100644 --- a/arch/powerpc/platforms/85xx/p1010rdb.c +++ b/arch/powerpc/platforms/85xx/p1010rdb.c @@ -24,7 +24,7 @@ #include "mpc85xx.h" -void __init p1010_rdb_pic_init(void) +static void __init p1010_rdb_pic_init(void) { struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c index 0dd786a061a6a..adc3a2ee14150 100644 --- a/arch/powerpc/platforms/85xx/p1022_ds.c +++ b/arch/powerpc/platforms/85xx/p1022_ds.c @@ -370,7 +370,7 @@ static void p1022ds_set_monitor_port(enum fsl_diu_monitor_port port) * * @pixclock: the wavelength, in picoseconds, of the clock */ -void p1022ds_set_pixel_clock(unsigned int pixclock) +static void p1022ds_set_pixel_clock(unsigned int pixclock) { struct device_node *guts_np = NULL; struct ccsr_guts __iomem *guts; @@ -418,7 +418,7 @@ void p1022ds_set_pixel_clock(unsigned int pixclock) /** * p1022ds_valid_monitor_port: set the monitor port for sysfs */ -enum fsl_diu_monitor_port +static enum fsl_diu_monitor_port p1022ds_valid_monitor_port(enum fsl_diu_monitor_port port) { switch (port) { @@ -432,7 +432,7 @@ p1022ds_valid_monitor_port(enum fsl_diu_monitor_port port) #endif -void __init p1022_ds_pic_init(void) +static void __init p1022_ds_pic_init(void) { struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c index 25ab6e9c14703..6198299d95b1b 100644 --- a/arch/powerpc/platforms/85xx/p1022_rdk.c +++ b/arch/powerpc/platforms/85xx/p1022_rdk.c @@ -40,7 +40,7 @@ * * @pixclock: the wavelength, in picoseconds, of the clock */ -void p1022rdk_set_pixel_clock(unsigned int pixclock) +static void p1022rdk_set_pixel_clock(unsigned int pixclock) { struct device_node *guts_np = NULL; struct ccsr_guts __iomem *guts; @@ -88,7 +88,7 @@ void p1022rdk_set_pixel_clock(unsigned int pixclock) /** * p1022rdk_valid_monitor_port: set the monitor port for sysfs */ -enum fsl_diu_monitor_port +static enum fsl_diu_monitor_port p1022rdk_valid_monitor_port(enum fsl_diu_monitor_port port) { return FSL_DIU_PORT_DVI; @@ -96,7 +96,7 @@ p1022rdk_valid_monitor_port(enum fsl_diu_monitor_port port) #endif -void __init p1022_rdk_pic_init(void) +static void __init p1022_rdk_pic_init(void) { struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c index baa12eff6d5de..60e0b8947ce61 100644 --- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c +++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c @@ -8,6 +8,8 @@ #include #include +#include "socrates_fpga_pic.h" + /* * The FPGA supports 9 interrupt sources, which can be routed to 3 * interrupt request lines of the MPIC. The line to be used can be diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c index 45f257fc1ade0..2582427d8d018 100644 --- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c +++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c @@ -37,7 +37,7 @@ #define MPC85xx_L2CTL_L2I 0x40000000 /* L2 flash invalidate */ #define MPC85xx_L2CTL_L2SIZ_MASK 0x30000000 /* L2 SRAM size (R/O) */ -void __init xes_mpc85xx_pic_init(void) +static void __init xes_mpc85xx_pic_init(void) { struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, 0, 256, " OpenPIC "); -- GitLab From 58aeb5623c2ebdadefe6352b14f8076a7073fea0 Mon Sep 17 00:00:00 2001 From: Fred Ai Date: Sat, 3 Feb 2024 02:29:08 -0800 Subject: [PATCH 0717/1405] mmc: sdhci-pci-o2micro: Fix a warm reboot issue that disk can't be detected by BIOS Driver shall switch clock source from DLL clock to OPE clock when power off card to ensure that card can be identified with OPE clock by BIOS. Signed-off-by: Fred Ai Fixes:4be33cf18703 ("mmc: sdhci-pci-o2micro: Improve card input timing at SDR104/HS200 mode") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240203102908.4683-1-fredaibayhubtech@126.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-o2micro.c | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index 7bfee28116af1..d4a02184784a3 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -693,6 +693,35 @@ static int sdhci_pci_o2_init_sd_express(struct mmc_host *mmc, struct mmc_ios *io return 0; } +static void sdhci_pci_o2_set_power(struct sdhci_host *host, unsigned char mode, unsigned short vdd) +{ + struct sdhci_pci_chip *chip; + struct sdhci_pci_slot *slot = sdhci_priv(host); + u32 scratch_32 = 0; + u8 scratch_8 = 0; + + chip = slot->chip; + + if (mode == MMC_POWER_OFF) { + /* UnLock WP */ + pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8); + scratch_8 &= 0x7f; + pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8); + + /* Set PCR 0x354[16] to switch Clock Source back to OPE Clock */ + pci_read_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, &scratch_32); + scratch_32 &= ~(O2_SD_SEL_DLL); + pci_write_config_dword(chip->pdev, O2_SD_OUTPUT_CLK_SOURCE_SWITCH, scratch_32); + + /* Lock WP */ + pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch_8); + scratch_8 |= 0x80; + pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch_8); + } + + sdhci_set_power(host, mode, vdd); +} + static int sdhci_pci_o2_probe_slot(struct sdhci_pci_slot *slot) { struct sdhci_pci_chip *chip; @@ -1051,6 +1080,7 @@ static const struct sdhci_ops sdhci_pci_o2_ops = { .set_bus_width = sdhci_set_bus_width, .reset = sdhci_reset, .set_uhs_signaling = sdhci_set_uhs_signaling, + .set_power = sdhci_pci_o2_set_power, }; const struct sdhci_pci_fixes sdhci_o2 = { -- GitLab From cc9432c4fb159a3913e0ce3173b8218cd5bad2e0 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 6 Feb 2024 09:39:12 +0100 Subject: [PATCH 0718/1405] mmc: slot-gpio: Allow non-sleeping GPIO ro This change uses the appropriate _cansleep or non-sleeping API for reading GPIO read-only state. This allows users with GPIOs that never sleepbeing called in atomic context. Implement the same mechanism as in commit 52af318c93e97 ("mmc: Allow non-sleeping GPIO cd"). Signed-off-by: Alexander Stein Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240206083912.2543142-1-alexander.stein@ew.tq-group.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/slot-gpio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c index 2a2d949a9344e..39f45c2b6de8a 100644 --- a/drivers/mmc/core/slot-gpio.c +++ b/drivers/mmc/core/slot-gpio.c @@ -75,11 +75,15 @@ EXPORT_SYMBOL(mmc_gpio_set_cd_irq); int mmc_gpio_get_ro(struct mmc_host *host) { struct mmc_gpio *ctx = host->slot.handler_priv; + int cansleep; if (!ctx || !ctx->ro_gpio) return -ENOSYS; - return gpiod_get_value_cansleep(ctx->ro_gpio); + cansleep = gpiod_cansleep(ctx->ro_gpio); + return cansleep ? + gpiod_get_value_cansleep(ctx->ro_gpio) : + gpiod_get_value(ctx->ro_gpio); } EXPORT_SYMBOL(mmc_gpio_get_ro); -- GitLab From c9da9a1f17bf4fa96b115950fd389c917b583c1c Mon Sep 17 00:00:00 2001 From: "Wachowski, Karol" Date: Fri, 26 Jan 2024 13:27:58 +0100 Subject: [PATCH 0719/1405] accel/ivpu: Force snooping for MMU writes Set AW_SNOOP_OVERRIDE bit in VPU_37/40XX_HOST_IF_TCU_PTW_OVERRIDES to force snooping for MMU write accesses (setting event queue events). MMU event queue buffer is the only buffer written by MMU and mapped as write-back which break cache coherency. Force write transactions to be snooped solving the problem. Signed-off-by: Wachowski, Karol Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240126122804.2169129-2-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_37xx.c | 2 +- drivers/accel/ivpu/ivpu_hw_40xx.c | 2 +- drivers/accel/ivpu/ivpu_mmu.c | 3 --- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c index f15a93d830578..77accd029c4a7 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ b/drivers/accel/ivpu/ivpu_hw_37xx.c @@ -525,7 +525,7 @@ static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev) u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES); val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, NOSNOOP_OVERRIDE_EN, val); - val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val); + val = REG_CLR_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val); val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val); REGV_WR32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, val); diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index 704288084f373..86b89b94f9f3d 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -530,7 +530,7 @@ static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev) u32 val = REGV_RD32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES); val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, SNOOP_OVERRIDE_EN, val); - val = REG_CLR_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AW_SNOOP_OVERRIDE, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AW_SNOOP_OVERRIDE, val); val = REG_CLR_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AR_SNOOP_OVERRIDE, val); REGV_WR32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, val); diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index 9a3122ffce03c..8df78adeee338 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -560,7 +560,6 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev) mmu->cmdq.cons = 0; memset(mmu->evtq.base, 0, IVPU_MMU_EVTQ_SIZE); - clflush_cache_range(mmu->evtq.base, IVPU_MMU_EVTQ_SIZE); mmu->evtq.prod = 0; mmu->evtq.cons = 0; @@ -877,8 +876,6 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev) if (!CIRC_CNT(IVPU_MMU_Q_IDX(evtq->prod), IVPU_MMU_Q_IDX(evtq->cons), IVPU_MMU_Q_COUNT)) return NULL; - clflush_cache_range(evt, IVPU_MMU_EVTQ_CMD_SIZE); - evtq->cons = (evtq->cons + 1) & IVPU_MMU_Q_WRAP_MASK; REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, evtq->cons); -- GitLab From b039f1c4d3727c3e44a7e89ff79e7e8533e1beec Mon Sep 17 00:00:00 2001 From: "Wachowski, Karol" Date: Fri, 26 Jan 2024 13:27:59 +0100 Subject: [PATCH 0720/1405] accel/ivpu: Correct MMU queue size checking functions Do not use kernel CIRC_SPACE and CIRC_CNT that incorrectly return space of a queue when wrap bit was set. Use correct implementation that compares producer, consumer and wrap bit values. Without this fix it was possible to lose events in case when event queue was full. Signed-off-by: Wachowski, Karol Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240126122804.2169129-3-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_mmu.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index 8df78adeee338..91bd640655ab3 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -72,10 +72,10 @@ #define IVPU_MMU_Q_COUNT_LOG2 4 /* 16 entries */ #define IVPU_MMU_Q_COUNT ((u32)1 << IVPU_MMU_Q_COUNT_LOG2) -#define IVPU_MMU_Q_WRAP_BIT (IVPU_MMU_Q_COUNT << 1) -#define IVPU_MMU_Q_WRAP_MASK (IVPU_MMU_Q_WRAP_BIT - 1) -#define IVPU_MMU_Q_IDX_MASK (IVPU_MMU_Q_COUNT - 1) +#define IVPU_MMU_Q_WRAP_MASK GENMASK(IVPU_MMU_Q_COUNT_LOG2, 0) +#define IVPU_MMU_Q_IDX_MASK (IVPU_MMU_Q_COUNT - 1) #define IVPU_MMU_Q_IDX(val) ((val) & IVPU_MMU_Q_IDX_MASK) +#define IVPU_MMU_Q_WRP(val) ((val) & IVPU_MMU_Q_COUNT) #define IVPU_MMU_CMDQ_CMD_SIZE 16 #define IVPU_MMU_CMDQ_SIZE (IVPU_MMU_Q_COUNT * IVPU_MMU_CMDQ_CMD_SIZE) @@ -475,20 +475,32 @@ static int ivpu_mmu_cmdq_wait_for_cons(struct ivpu_device *vdev) return 0; } +static bool ivpu_mmu_queue_is_full(struct ivpu_mmu_queue *q) +{ + return ((IVPU_MMU_Q_IDX(q->prod) == IVPU_MMU_Q_IDX(q->cons)) && + (IVPU_MMU_Q_WRP(q->prod) != IVPU_MMU_Q_WRP(q->cons))); +} + +static bool ivpu_mmu_queue_is_empty(struct ivpu_mmu_queue *q) +{ + return ((IVPU_MMU_Q_IDX(q->prod) == IVPU_MMU_Q_IDX(q->cons)) && + (IVPU_MMU_Q_WRP(q->prod) == IVPU_MMU_Q_WRP(q->cons))); +} + static int ivpu_mmu_cmdq_cmd_write(struct ivpu_device *vdev, const char *name, u64 data0, u64 data1) { - struct ivpu_mmu_queue *q = &vdev->mmu->cmdq; - u64 *queue_buffer = q->base; - int idx = IVPU_MMU_Q_IDX(q->prod) * (IVPU_MMU_CMDQ_CMD_SIZE / sizeof(*queue_buffer)); + struct ivpu_mmu_queue *cmdq = &vdev->mmu->cmdq; + u64 *queue_buffer = cmdq->base; + int idx = IVPU_MMU_Q_IDX(cmdq->prod) * (IVPU_MMU_CMDQ_CMD_SIZE / sizeof(*queue_buffer)); - if (!CIRC_SPACE(IVPU_MMU_Q_IDX(q->prod), IVPU_MMU_Q_IDX(q->cons), IVPU_MMU_Q_COUNT)) { + if (ivpu_mmu_queue_is_full(cmdq)) { ivpu_err(vdev, "Failed to write MMU CMD %s\n", name); return -EBUSY; } queue_buffer[idx] = data0; queue_buffer[idx + 1] = data1; - q->prod = (q->prod + 1) & IVPU_MMU_Q_WRAP_MASK; + cmdq->prod = (cmdq->prod + 1) & IVPU_MMU_Q_WRAP_MASK; ivpu_dbg(vdev, MMU, "CMD write: %s data: 0x%llx 0x%llx\n", name, data0, data1); @@ -873,12 +885,10 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev) u32 *evt = evtq->base + (idx * IVPU_MMU_EVTQ_CMD_SIZE); evtq->prod = REGV_RD32(IVPU_MMU_REG_EVTQ_PROD_SEC); - if (!CIRC_CNT(IVPU_MMU_Q_IDX(evtq->prod), IVPU_MMU_Q_IDX(evtq->cons), IVPU_MMU_Q_COUNT)) + if (ivpu_mmu_queue_is_empty(evtq)) return NULL; evtq->cons = (evtq->cons + 1) & IVPU_MMU_Q_WRAP_MASK; - REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, evtq->cons); - return evt; } @@ -899,6 +909,7 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev) } ivpu_mmu_user_context_mark_invalid(vdev, ssid); + REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons); } } -- GitLab From a7f31091ddf457352e3dd7ac183fdbd26b4dcd04 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Fri, 26 Jan 2024 13:28:00 +0100 Subject: [PATCH 0721/1405] accel/ivpu: Disable d3hot_delay on all NPU generations NPU does not require this delay regardless of the generation. All generations are integrated into the SOC. Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240126122804.2169129-4-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_drv.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 9418c73ee8ef8..4b06402269869 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -480,9 +480,8 @@ static int ivpu_pci_init(struct ivpu_device *vdev) /* Clear any pending errors */ pcie_capability_clear_word(pdev, PCI_EXP_DEVSTA, 0x3f); - /* VPU 37XX does not require 10m D3hot delay */ - if (ivpu_hw_gen(vdev) == IVPU_HW_37XX) - pdev->d3hot_delay = 0; + /* NPU does not require 10m D3hot delay */ + pdev->d3hot_delay = 0; ret = pcim_enable_device(pdev); if (ret) { -- GitLab From a939c03d37788e4a9a645c88d5e5b6a475ba0fd5 Mon Sep 17 00:00:00 2001 From: Krystian Pradzynski Date: Fri, 26 Jan 2024 13:28:02 +0100 Subject: [PATCH 0722/1405] accel/ivpu/40xx: Enable D0i3 message All recent 40xx firmware already supports D0i3 entry message and this WA is no longer needed. Signed-off-by: Krystian Pradzynski Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20240126122804.2169129-6-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_fw.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 6576232f3e678..5fa8bd4603d5b 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -222,7 +222,6 @@ ivpu_fw_init_wa(struct ivpu_device *vdev) const struct vpu_firmware_header *fw_hdr = (const void *)vdev->fw->file->data; if (IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, BOOT, 3, 17) || - (ivpu_hw_gen(vdev) > IVPU_HW_37XX) || (ivpu_test_mode & IVPU_TEST_MODE_D0I3_MSG_DISABLE)) vdev->wa.disable_d0i3_msg = true; -- GitLab From 553099da45397914a995dce6307d6c26523c2567 Mon Sep 17 00:00:00 2001 From: Krystian Pradzynski Date: Fri, 26 Jan 2024 13:28:03 +0100 Subject: [PATCH 0723/1405] accel/ivpu/40xx: Stop passing SKU boot parameters to FW This parameter was never used by the 40xx FW. Signed-off-by: Krystian Pradzynski Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240126122804.2169129-7-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_40xx.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index 86b89b94f9f3d..1c995307c1138 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -704,7 +704,6 @@ static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev) { struct ivpu_hw_info *hw = vdev->hw; u32 tile_disable; - u32 tile_enable; u32 fuse; fuse = REGB_RD32(VPU_40XX_BUTTRESS_TILE_FUSE); @@ -725,10 +724,6 @@ static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev) else ivpu_dbg(vdev, MISC, "Fuse: All %d tiles enabled\n", TILE_MAX_NUM); - tile_enable = (~tile_disable) & TILE_MAX_MASK; - - hw->sku = REG_SET_FLD_NUM(SKU, HW_ID, LNL_HW_ID, hw->sku); - hw->sku = REG_SET_FLD_NUM(SKU, TILE, tile_enable, hw->sku); hw->tile_fuse = tile_disable; hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT; -- GitLab From 5f8408aca66772d3aa9b4831577b2ac5ec41bcd9 Mon Sep 17 00:00:00 2001 From: Grzegorz Trzebiatowski Date: Fri, 26 Jan 2024 13:28:04 +0100 Subject: [PATCH 0724/1405] accel/ivpu: Add job status for jobs aborted by the driver Add DRM_IVPU_JOB_STATUS_ABORTED to indicate that the job was aborted by the driver due to e.g. TDR or user context MMU faults. This will help UMD and tests distinguish if job was aborted by the FW or the driver. Signed-off-by: Grzegorz Trzebiatowski Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240126122804.2169129-8-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_job.c | 4 ++-- include/uapi/drm/ivpu_accel.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 0440bee3ecafd..e70cfb8593390 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -294,7 +294,7 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 return -ENOENT; if (job->file_priv->has_mmu_faults) - job_status = VPU_JSM_STATUS_ABORTED; + job_status = DRM_IVPU_JOB_STATUS_ABORTED; job->bos[CMD_BUF_IDX]->job_status = job_status; dma_fence_signal(job->done_fence); @@ -315,7 +315,7 @@ void ivpu_jobs_abort_all(struct ivpu_device *vdev) unsigned long id; xa_for_each(&vdev->submitted_jobs_xa, id, job) - ivpu_job_signal_and_destroy(vdev, id, VPU_JSM_STATUS_ABORTED); + ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED); } static int ivpu_job_submit(struct ivpu_job *job) diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 63c49318a8630..19a13468eca5e 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -305,6 +305,7 @@ struct drm_ivpu_submit { /* drm_ivpu_bo_wait job status codes */ #define DRM_IVPU_JOB_STATUS_SUCCESS 0 +#define DRM_IVPU_JOB_STATUS_ABORTED 256 /** * struct drm_ivpu_bo_wait - Wait for BO to become inactive -- GitLab From 1ce2654d87e2fb91fea83b288bd9b2641045e42a Mon Sep 17 00:00:00 2001 From: Furong Xu <0x1207@gmail.com> Date: Sat, 3 Feb 2024 13:31:33 +0800 Subject: [PATCH 0725/1405] net: stmmac: xgmac: fix a typo of register name in DPP safety handling DDPP is copied from Synopsys Data book: DDPP: Disable Data path Parity Protection. When it is 0x0, Data path Parity Protection is enabled. When it is 0x1, Data path Parity Protection is disabled. The macro name should be XGMAC_DPP_DISABLE. Fixes: 46eba193d04f ("net: stmmac: xgmac: fix handling of DPP safety error for DMA channels") Signed-off-by: Furong Xu <0x1207@gmail.com> Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/20240203053133.1129236-1-0x1207@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h | 2 +- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 5c67a3f89f088..6a2c7d22df1eb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -304,7 +304,7 @@ #define XGMAC_TXCEIE BIT(0) #define XGMAC_MTL_ECC_INT_STATUS 0x000010cc #define XGMAC_MTL_DPP_CONTROL 0x000010e0 -#define XGMAC_DDPP_DISABLE BIT(0) +#define XGMAC_DPP_DISABLE BIT(0) #define XGMAC_MTL_TXQ_OPMODE(x) (0x00001100 + (0x80 * (x))) #define XGMAC_TQS GENMASK(25, 16) #define XGMAC_TQS_SHIFT 16 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 04d7c4dc2e35f..323c57f03c93c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -928,7 +928,7 @@ dwxgmac3_safety_feat_config(void __iomem *ioaddr, unsigned int asp, /* 5. Enable Data Path Parity Protection */ value = readl(ioaddr + XGMAC_MTL_DPP_CONTROL); /* already enabled by default, explicit enable it again */ - value &= ~XGMAC_DDPP_DISABLE; + value &= ~XGMAC_DPP_DISABLE; writel(value, ioaddr + XGMAC_MTL_DPP_CONTROL); return 0; -- GitLab From 720e78d7fa0f1df905a42ae68e7e3b0f95e53946 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 31 Jan 2024 11:24:59 +0300 Subject: [PATCH 0726/1405] serial: 8250_pci1xxxx: partially revert off by one patch I was reviewing this code again and I realized I made a mistake here. It should have been > instead of >=. The subtract ensures that we don't go out of bounds. My patch meant that we don't read the last chunk of the buffer. Fixes: 86ee55e9bc7f ("serial: 8250_pci1xxxx: fix off by one in pci1xxxx_process_read_data()") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/bd6fb361-bbb9-427d-90e8-a5df4de76221@moroto.mountain Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci1xxxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_pci1xxxx.c b/drivers/tty/serial/8250/8250_pci1xxxx.c index cd258922bd780..2dda737b1660b 100644 --- a/drivers/tty/serial/8250/8250_pci1xxxx.c +++ b/drivers/tty/serial/8250/8250_pci1xxxx.c @@ -302,7 +302,7 @@ static void pci1xxxx_process_read_data(struct uart_port *port, * to read, the data is received one byte at a time. */ while (valid_burst_count--) { - if (*buff_index >= (RX_BUF_SIZE - UART_BURST_SIZE)) + if (*buff_index > (RX_BUF_SIZE - UART_BURST_SIZE)) break; burst_buf = (u32 *)&rx_buff[*buff_index]; *burst_buf = readl(port->membase + UART_RX_BURST_FIFO); -- GitLab From 3ee07964d407411fd578a3bc998de44fd64d266a Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 1 Feb 2024 11:55:56 +0100 Subject: [PATCH 0727/1405] serial: core: introduce uart_port_tx_flags() And an enum with a flag: UART_TX_NOSTOP. To NOT call __port->ops->stop_tx() when the circular buffer is empty. mxs-uart needs this (see the next patch). Signed-off-by: "Jiri Slaby (SUSE)" Cc: stable Tested-by: Emil Kronborg Link: https://lore.kernel.org/r/20240201105557.28043-1-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 536b2581d3e20..55b1f3ba48ac1 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -748,8 +748,17 @@ struct uart_driver { void uart_write_wakeup(struct uart_port *port); -#define __uart_port_tx(uport, ch, tx_ready, put_char, tx_done, for_test, \ - for_post) \ +/** + * enum UART_TX_FLAGS -- flags for uart_port_tx_flags() + * + * @UART_TX_NOSTOP: don't call port->ops->stop_tx() on empty buffer + */ +enum UART_TX_FLAGS { + UART_TX_NOSTOP = BIT(0), +}; + +#define __uart_port_tx(uport, ch, flags, tx_ready, put_char, tx_done, \ + for_test, for_post) \ ({ \ struct uart_port *__port = (uport); \ struct circ_buf *xmit = &__port->state->xmit; \ @@ -777,7 +786,7 @@ void uart_write_wakeup(struct uart_port *port); if (pending < WAKEUP_CHARS) { \ uart_write_wakeup(__port); \ \ - if (pending == 0) \ + if (!((flags) & UART_TX_NOSTOP) && pending == 0) \ __port->ops->stop_tx(__port); \ } \ \ @@ -812,7 +821,7 @@ void uart_write_wakeup(struct uart_port *port); */ #define uart_port_tx_limited(port, ch, count, tx_ready, put_char, tx_done) ({ \ unsigned int __count = (count); \ - __uart_port_tx(port, ch, tx_ready, put_char, tx_done, __count, \ + __uart_port_tx(port, ch, 0, tx_ready, put_char, tx_done, __count, \ __count--); \ }) @@ -826,8 +835,21 @@ void uart_write_wakeup(struct uart_port *port); * See uart_port_tx_limited() for more details. */ #define uart_port_tx(port, ch, tx_ready, put_char) \ - __uart_port_tx(port, ch, tx_ready, put_char, ({}), true, ({})) + __uart_port_tx(port, ch, 0, tx_ready, put_char, ({}), true, ({})) + +/** + * uart_port_tx_flags -- transmit helper for uart_port with flags + * @port: uart port + * @ch: variable to store a character to be written to the HW + * @flags: %UART_TX_NOSTOP or similar + * @tx_ready: can HW accept more data function + * @put_char: function to write a character + * + * See uart_port_tx_limited() for more details. + */ +#define uart_port_tx_flags(port, ch, flags, tx_ready, put_char) \ + __uart_port_tx(port, ch, flags, tx_ready, put_char, ({}), true, ({})) /* * Baud rate helpers. */ -- GitLab From 7be50f2e8f20fc2299069b28dea59a28e3abe20a Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 1 Feb 2024 11:55:57 +0100 Subject: [PATCH 0728/1405] serial: mxs-auart: fix tx Emil reports: After updating Linux on an i.MX28 board, serial communication over AUART broke. When I TX from the board and measure on the TX pin, it seems like the HW fifo is not emptied before the transmission is stopped. MXS performs weird things with stop_tx(). The driver makes it conditional on uart_tx_stopped(). So the driver needs special handling. Pass the brand new UART_TX_NOSTOP to uart_port_tx_flags() and handle the stop on its own. Signed-off-by: "Jiri Slaby (SUSE)" Reported-by: Emil Kronborg Cc: stable Fixes: 2d141e683e9a ("tty: serial: use uart_port_tx() helper") Closes: https://lore.kernel.org/all/miwgbnvy3hjpnricubg76ytpn7xoceehwahupy25bubbduu23s@om2lptpa26xw/ Tested-by: Stefan Wahren Tested-by: Emil Kronborg Link: https://lore.kernel.org/r/20240201105557.28043-2-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mxs-auart.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c index 3ec725555bcc1..4749331fe618c 100644 --- a/drivers/tty/serial/mxs-auart.c +++ b/drivers/tty/serial/mxs-auart.c @@ -605,13 +605,16 @@ static void mxs_auart_tx_chars(struct mxs_auart_port *s) return; } - pending = uart_port_tx(&s->port, ch, + pending = uart_port_tx_flags(&s->port, ch, UART_TX_NOSTOP, !(mxs_read(s, REG_STAT) & AUART_STAT_TXFF), mxs_write(ch, s, REG_DATA)); if (pending) mxs_set(AUART_INTR_TXIEN, s, REG_INTR); else mxs_clr(AUART_INTR_TXIEN, s, REG_INTR); + + if (uart_tx_stopped(&s->port)) + mxs_auart_stop_tx(&s->port); } static void mxs_auart_rx_char(struct mxs_auart_port *s) -- GitLab From 411a20db905b44e18cc9129b745f1d5deba4eae5 Mon Sep 17 00:00:00 2001 From: Oleksandr Natalenko Date: Mon, 29 Jan 2024 17:49:31 +0100 Subject: [PATCH 0729/1405] HID: logitech-hidpp: Do not flood kernel log Since commit 680ee411a98e ("HID: logitech-hidpp: Fix connect event race") the following messages appear in the kernel log from time to time: logitech-hidpp-device 0003:046D:408A.0005: HID++ 4.5 device connected. logitech-hidpp-device 0003:046D:408A.0005: HID++ 4.5 device connected. logitech-hidpp-device 0003:046D:4051.0006: Disconnected logitech-hidpp-device 0003:046D:408A.0005: Disconnected As discussed, print the first per-device "device connected" message at info level, demoting subsequent messages to debug level. Also, demote the "Disconnected message" to debug level unconditionally. Link: https://lore.kernel.org/lkml/3277085.44csPzL39Z@natalenko.name/ Signed-off-by: Oleksandr Natalenko Reviewed-by: Hans de Goede Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-hidpp.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 6ef0c88e3e60a..d2f3f234f29de 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -203,6 +203,8 @@ struct hidpp_device { struct hidpp_scroll_counter vertical_wheel_counter; u8 wireless_feature_index; + + bool connected_once; }; /* HID++ 1.0 error codes */ @@ -988,8 +990,13 @@ static int hidpp_root_get_protocol_version(struct hidpp_device *hidpp) hidpp->protocol_minor = response.rap.params[1]; print_version: - hid_info(hidpp->hid_dev, "HID++ %u.%u device connected.\n", - hidpp->protocol_major, hidpp->protocol_minor); + if (!hidpp->connected_once) { + hid_info(hidpp->hid_dev, "HID++ %u.%u device connected.\n", + hidpp->protocol_major, hidpp->protocol_minor); + hidpp->connected_once = true; + } else + hid_dbg(hidpp->hid_dev, "HID++ %u.%u device connected.\n", + hidpp->protocol_major, hidpp->protocol_minor); return 0; } @@ -4184,7 +4191,7 @@ static void hidpp_connect_event(struct work_struct *work) /* Get device version to check if it is connected */ ret = hidpp_root_get_protocol_version(hidpp); if (ret) { - hid_info(hidpp->hid_dev, "Disconnected\n"); + hid_dbg(hidpp->hid_dev, "Disconnected\n"); if (hidpp->battery.ps) { hidpp->battery.online = false; hidpp->battery.status = POWER_SUPPLY_STATUS_UNKNOWN; -- GitLab From c1d6708bf0d3dd976460d435373cf5abf21ce258 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Mon, 29 Jan 2024 14:35:45 -0800 Subject: [PATCH 0730/1405] HID: wacom: Do not register input devices until after hid_hw_start If a input device is opened before hid_hw_start is called, events may not be received from the hardware. In the case of USB-backed devices, for example, the hid_hw_start function is responsible for filling in the URB which is submitted when the input device is opened. If a device is opened prematurely, polling will never start because the device will not have been in the correct state to send the URB. Because the wacom driver registers its input devices before calling hid_hw_start, there is a window of time where a device can be opened and end up in an inoperable state. Some ARM-based Chromebooks in particular reliably trigger this bug. This commit splits the wacom_register_inputs function into two pieces. One which is responsible for setting up the allocated inputs (and runs prior to hid_hw_start so that devices are ready for any input events they may end up receiving) and another which only registers the devices (and runs after hid_hw_start to ensure devices can be immediately opened without issue). Note that the functions to initialize the LEDs and remotes are also moved after hid_hw_start to maintain their own dependency chains. Fixes: 7704ac937345 ("HID: wacom: implement generic HID handling for pen generic devices") Cc: stable@vger.kernel.org # v3.18+ Suggested-by: Dmitry Torokhov Signed-off-by: Jason Gerecke Tested-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- drivers/hid/wacom_sys.c | 63 ++++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index b613f11ed9498..2bc45b24075c3 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2087,7 +2087,7 @@ static int wacom_allocate_inputs(struct wacom *wacom) return 0; } -static int wacom_register_inputs(struct wacom *wacom) +static int wacom_setup_inputs(struct wacom *wacom) { struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev; struct wacom_wac *wacom_wac = &(wacom->wacom_wac); @@ -2106,10 +2106,6 @@ static int wacom_register_inputs(struct wacom *wacom) input_free_device(pen_input_dev); wacom_wac->pen_input = NULL; pen_input_dev = NULL; - } else { - error = input_register_device(pen_input_dev); - if (error) - goto fail; } error = wacom_setup_touch_input_capabilities(touch_input_dev, wacom_wac); @@ -2118,10 +2114,6 @@ static int wacom_register_inputs(struct wacom *wacom) input_free_device(touch_input_dev); wacom_wac->touch_input = NULL; touch_input_dev = NULL; - } else { - error = input_register_device(touch_input_dev); - if (error) - goto fail; } error = wacom_setup_pad_input_capabilities(pad_input_dev, wacom_wac); @@ -2130,7 +2122,34 @@ static int wacom_register_inputs(struct wacom *wacom) input_free_device(pad_input_dev); wacom_wac->pad_input = NULL; pad_input_dev = NULL; - } else { + } + + return 0; +} + +static int wacom_register_inputs(struct wacom *wacom) +{ + struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev; + struct wacom_wac *wacom_wac = &(wacom->wacom_wac); + int error = 0; + + pen_input_dev = wacom_wac->pen_input; + touch_input_dev = wacom_wac->touch_input; + pad_input_dev = wacom_wac->pad_input; + + if (pen_input_dev) { + error = input_register_device(pen_input_dev); + if (error) + goto fail; + } + + if (touch_input_dev) { + error = input_register_device(touch_input_dev); + if (error) + goto fail; + } + + if (pad_input_dev) { error = input_register_device(pad_input_dev); if (error) goto fail; @@ -2383,6 +2402,20 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) if (error) goto fail; + error = wacom_setup_inputs(wacom); + if (error) + goto fail; + + if (features->type == HID_GENERIC) + connect_mask |= HID_CONNECT_DRIVER; + + /* Regular HID work starts now */ + error = hid_hw_start(hdev, connect_mask); + if (error) { + hid_err(hdev, "hw start failed\n"); + goto fail; + } + error = wacom_register_inputs(wacom); if (error) goto fail; @@ -2397,16 +2430,6 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) goto fail; } - if (features->type == HID_GENERIC) - connect_mask |= HID_CONNECT_DRIVER; - - /* Regular HID work starts now */ - error = hid_hw_start(hdev, connect_mask); - if (error) { - hid_err(hdev, "hw start failed\n"); - goto fail; - } - if (!wireless) { /* Note that if query fails it is not a hard failure */ wacom_query_tablet_data(wacom); -- GitLab From a4ab7dedaee0e39b15653c5fd0367e420739f7ef Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 2 Feb 2024 13:34:07 +0000 Subject: [PATCH 0731/1405] iommufd/iova_bitmap: Bounds check mapped::pages access Dirty IOMMU hugepages reported on a base page page-size granularity can lead to an attempt to set dirty pages in the bitmap beyond the limits that are pinned. Bounds check the page index of the array we are trying to access is within the limits before we kmap() and return otherwise. While it is also a defensive check, this is also in preparation to defer setting bits (outside the mapped range) to the next iteration(s) when the pages become available. Fixes: b058ea3ab5af ("vfio/iova_bitmap: refactor iova_bitmap_set() to better handle page boundaries") Link: https://lore.kernel.org/r/20240202133415.23819-2-joao.m.martins@oracle.com Signed-off-by: Joao Martins Tested-by: Avihai Horon Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/iova_bitmap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index 0a92c9eeaf7f5..a3606b4c22292 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -409,6 +409,7 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; unsigned long last_bit = (((iova + length - 1) - mapped->iova) >> mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; + unsigned long last_page_idx = mapped->npages - 1; do { unsigned int page_idx = cur_bit / BITS_PER_PAGE; @@ -417,6 +418,9 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, last_bit - cur_bit + 1); void *kaddr; + if (unlikely(page_idx > last_page_idx)) + break; + kaddr = kmap_local_page(mapped->pages[page_idx]); bitmap_set(kaddr, offset, nbits); kunmap_local(kaddr); -- GitLab From d18411ec305728c6371806c4fb09be07016aad0b Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 2 Feb 2024 13:34:08 +0000 Subject: [PATCH 0732/1405] iommufd/iova_bitmap: Switch iova_bitmap::bitmap to an u8 array iova_bitmap_mapped_length() don't deal correctly with the small bitmaps (< 2M bitmaps) when the starting address isn't u64 aligned, leading to skipping a tiny part of the IOVA range. This is materialized as not marking data dirty that should otherwise have been. Fix that by using a u8 * in the internal state of IOVA bitmap. Most of the data structures use the type of the bitmap to adjust its indexes, thus changing the type of the bitmap decreases the granularity of the bitmap indexes. Fixes: b058ea3ab5af ("vfio/iova_bitmap: refactor iova_bitmap_set() to better handle page boundaries") Link: https://lore.kernel.org/r/20240202133415.23819-3-joao.m.martins@oracle.com Signed-off-by: Joao Martins Tested-by: Avihai Horon Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/iova_bitmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index a3606b4c22292..9d42ab51a6bb3 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -100,7 +100,7 @@ struct iova_bitmap { struct iova_bitmap_map mapped; /* userspace address of the bitmap */ - u64 __user *bitmap; + u8 __user *bitmap; /* u64 index that @mapped points to */ unsigned long mapped_base_index; @@ -162,7 +162,7 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap) { struct iova_bitmap_map *mapped = &bitmap->mapped; unsigned long npages; - u64 __user *addr; + u8 __user *addr; long ret; /* @@ -247,7 +247,7 @@ struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, mapped = &bitmap->mapped; mapped->pgshift = __ffs(page_size); - bitmap->bitmap = data; + bitmap->bitmap = (u8 __user *)data; bitmap->mapped_total_index = iova_bitmap_offset_to_index(bitmap, length - 1) + 1; bitmap->iova = iova; @@ -304,7 +304,7 @@ static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap) remaining = bitmap->mapped_total_index - bitmap->mapped_base_index; remaining = min_t(unsigned long, remaining, - bytes / sizeof(*bitmap->bitmap)); + DIV_ROUND_UP(bytes, sizeof(*bitmap->bitmap))); return remaining; } -- GitLab From 42af95114535dd94c39714b97ad720602d406b9a Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 2 Feb 2024 13:34:09 +0000 Subject: [PATCH 0733/1405] iommufd/selftest: Test u64 unaligned bitmaps Exercise the dirty tracking bitmaps with byte unaligned addresses in addition to the PAGE_SIZE unaligned bitmaps, using a address towards the end of the page boundary. In doing so, increase the tailroom we allocate for the bitmap from MOCK_PAGE_SIZE(2K) into PAGE_SIZE(4K), such that we can test end of bitmap boundary. Link: https://lore.kernel.org/r/20240202133415.23819-4-joao.m.martins@oracle.com Signed-off-by: Joao Martins Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 1a881e7a21d1b..49774a720314e 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -1741,9 +1741,9 @@ FIXTURE_SETUP(iommufd_dirty_tracking) self->bitmap_size = variant->buffer_size / self->page_size / BITS_PER_BYTE; - /* Provision with an extra (MOCK_PAGE_SIZE) for the unaligned case */ + /* Provision with an extra (PAGE_SIZE) for the unaligned case */ rc = posix_memalign(&self->bitmap, PAGE_SIZE, - self->bitmap_size + MOCK_PAGE_SIZE); + self->bitmap_size + PAGE_SIZE); assert(!rc); assert(self->bitmap); assert((uintptr_t)self->bitmap % PAGE_SIZE == 0); @@ -1873,6 +1873,13 @@ TEST_F(iommufd_dirty_tracking, get_dirty_bitmap) self->bitmap + MOCK_PAGE_SIZE, self->bitmap_size, 0, _metadata); + /* u64 unaligned bitmap */ + test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, + MOCK_APERTURE_START, self->page_size, + self->bitmap + 0xff1, + self->bitmap_size, 0, _metadata); + + test_ioctl_destroy(stddev_id); test_ioctl_destroy(hwpt_id); } @@ -1907,6 +1914,14 @@ TEST_F(iommufd_dirty_tracking, get_dirty_bitmap_no_clear) IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR, _metadata); + /* u64 unaligned bitmap */ + test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, + MOCK_APERTURE_START, self->page_size, + self->bitmap + 0xff1, + self->bitmap_size, + IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR, + _metadata); + test_ioctl_destroy(stddev_id); test_ioctl_destroy(hwpt_id); } -- GitLab From 2780025e01e2e1c92f83ee7da91d9727c2e58a3e Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 2 Feb 2024 13:34:10 +0000 Subject: [PATCH 0734/1405] iommufd/iova_bitmap: Handle recording beyond the mapped pages IOVA bitmap is a zero-copy scheme of recording dirty bits that iterate the different bitmap user pages at chunks of a maximum of PAGE_SIZE/sizeof(struct page*) pages. When the iterations are split up into 64G, the end of the range may be broken up in a way that's aligned with a non base page PTE size. This leads to only part of the huge page being recorded in the bitmap. Note that in pratice this is only a problem for IOMMU dirty tracking i.e. when the backing PTEs are in IOMMU hugepages and the bitmap is in base page granularity. So far this not something that affects VF dirty trackers (which reports and records at the same granularity). To fix that, if there is a remainder of bits left to set in which the current IOVA bitmap doesn't cover, make a copy of the bitmap structure and iterate-and-set the rest of the bits remaining. Finally, when advancing the iterator, skip all the bits that were set ahead. Link: https://lore.kernel.org/r/20240202133415.23819-5-joao.m.martins@oracle.com Reported-by: Avihai Horon Fixes: f35f22cc760e ("iommu/vt-d: Access/Dirty bit support for SS domains") Fixes: 421a511a293f ("iommu/amd: Access/Dirty bit support in IOPTEs") Signed-off-by: Joao Martins Tested-by: Avihai Horon Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/iova_bitmap.c | 43 +++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index 9d42ab51a6bb3..b370e8ee88665 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -113,6 +113,9 @@ struct iova_bitmap { /* length of the IOVA range for the whole bitmap */ size_t length; + + /* length of the IOVA range set ahead the pinned pages */ + unsigned long set_ahead_length; }; /* @@ -341,6 +344,32 @@ static bool iova_bitmap_done(struct iova_bitmap *bitmap) return bitmap->mapped_base_index >= bitmap->mapped_total_index; } +static int iova_bitmap_set_ahead(struct iova_bitmap *bitmap, + size_t set_ahead_length) +{ + int ret = 0; + + while (set_ahead_length > 0 && !iova_bitmap_done(bitmap)) { + unsigned long length = iova_bitmap_mapped_length(bitmap); + unsigned long iova = iova_bitmap_mapped_iova(bitmap); + + ret = iova_bitmap_get(bitmap); + if (ret) + break; + + length = min(length, set_ahead_length); + iova_bitmap_set(bitmap, iova, length); + + set_ahead_length -= length; + bitmap->mapped_base_index += + iova_bitmap_offset_to_index(bitmap, length - 1) + 1; + iova_bitmap_put(bitmap); + } + + bitmap->set_ahead_length = 0; + return ret; +} + /* * Advances to the next range, releases the current pinned * pages and pins the next set of bitmap pages. @@ -357,6 +386,15 @@ static int iova_bitmap_advance(struct iova_bitmap *bitmap) if (iova_bitmap_done(bitmap)) return 0; + /* Iterate, set and skip any bits requested for next iteration */ + if (bitmap->set_ahead_length) { + int ret; + + ret = iova_bitmap_set_ahead(bitmap, bitmap->set_ahead_length); + if (ret) + return ret; + } + /* When advancing the index we pin the next set of bitmap pages */ return iova_bitmap_get(bitmap); } @@ -426,5 +464,10 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, kunmap_local(kaddr); cur_bit += nbits; } while (cur_bit <= last_bit); + + if (unlikely(cur_bit <= last_bit)) { + bitmap->set_ahead_length = + ((last_bit - cur_bit + 1) << bitmap->mapped.pgshift); + } } EXPORT_SYMBOL_NS_GPL(iova_bitmap_set, IOMMUFD); -- GitLab From 407fc184f0e0bfde61026d6ce3fb1e70a15159a3 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 2 Feb 2024 13:34:11 +0000 Subject: [PATCH 0735/1405] iommufd/selftest: Refactor dirty bitmap tests Rework the functions that test and set the bitmaps to receive a new parameter (the pte_page_size) that reflects the expected PTE size in the page tables. The same scheme is still used i.e. even bits are dirty and odd page indexes aren't dirty. Here it just refactors to consider the size of the PTE rather than hardcoded to IOMMU mock base page assumptions. While at it, refactor dirty bitmap tests to use the idev_id created by the fixture instead of creating a new one. This is in preparation for doing tests with IOMMU hugepages where multiple bits set as part of recording a whole hugepage as dirty and thus the pte_page_size will vary depending on io hugepages or io base pages. Link: https://lore.kernel.org/r/20240202133415.23819-6-joao.m.martins@oracle.com Signed-off-by: Joao Martins Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd.c | 28 ++++++------- tools/testing/selftests/iommu/iommufd_utils.h | 39 ++++++++++++------- 2 files changed, 36 insertions(+), 31 deletions(-) diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 49774a720314e..56c3e511a0ab7 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -1849,7 +1849,7 @@ TEST_F(iommufd_dirty_tracking, device_dirty_capability) TEST_F(iommufd_dirty_tracking, get_dirty_bitmap) { - uint32_t stddev_id; + uint32_t page_size = MOCK_PAGE_SIZE; uint32_t hwpt_id; uint32_t ioas_id; @@ -1859,34 +1859,31 @@ TEST_F(iommufd_dirty_tracking, get_dirty_bitmap) test_cmd_hwpt_alloc(self->idev_id, ioas_id, IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id); - test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); test_cmd_set_dirty_tracking(hwpt_id, true); test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, - MOCK_APERTURE_START, self->page_size, + MOCK_APERTURE_START, self->page_size, page_size, self->bitmap, self->bitmap_size, 0, _metadata); /* PAGE_SIZE unaligned bitmap */ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, - MOCK_APERTURE_START, self->page_size, + MOCK_APERTURE_START, self->page_size, page_size, self->bitmap + MOCK_PAGE_SIZE, self->bitmap_size, 0, _metadata); /* u64 unaligned bitmap */ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, - MOCK_APERTURE_START, self->page_size, - self->bitmap + 0xff1, - self->bitmap_size, 0, _metadata); - + MOCK_APERTURE_START, self->page_size, page_size, + self->bitmap + 0xff1, self->bitmap_size, 0, + _metadata); - test_ioctl_destroy(stddev_id); test_ioctl_destroy(hwpt_id); } TEST_F(iommufd_dirty_tracking, get_dirty_bitmap_no_clear) { - uint32_t stddev_id; + uint32_t page_size = MOCK_PAGE_SIZE; uint32_t hwpt_id; uint32_t ioas_id; @@ -1896,19 +1893,18 @@ TEST_F(iommufd_dirty_tracking, get_dirty_bitmap_no_clear) test_cmd_hwpt_alloc(self->idev_id, ioas_id, IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id); - test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); test_cmd_set_dirty_tracking(hwpt_id, true); test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, - MOCK_APERTURE_START, self->page_size, + MOCK_APERTURE_START, self->page_size, page_size, self->bitmap, self->bitmap_size, IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR, _metadata); /* Unaligned bitmap */ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, - MOCK_APERTURE_START, self->page_size, + MOCK_APERTURE_START, self->page_size, page_size, self->bitmap + MOCK_PAGE_SIZE, self->bitmap_size, IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR, @@ -1916,13 +1912,11 @@ TEST_F(iommufd_dirty_tracking, get_dirty_bitmap_no_clear) /* u64 unaligned bitmap */ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, - MOCK_APERTURE_START, self->page_size, - self->bitmap + 0xff1, - self->bitmap_size, + MOCK_APERTURE_START, self->page_size, page_size, + self->bitmap + 0xff1, self->bitmap_size, IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR, _metadata); - test_ioctl_destroy(stddev_id); test_ioctl_destroy(hwpt_id); } diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index c646264aa41fd..8d2b46b2114da 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -344,16 +344,19 @@ static int _test_cmd_mock_domain_set_dirty(int fd, __u32 hwpt_id, size_t length, page_size, bitmap, nr)) static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, - __u64 iova, size_t page_size, __u64 *bitmap, + __u64 iova, size_t page_size, + size_t pte_page_size, __u64 *bitmap, __u64 bitmap_size, __u32 flags, struct __test_metadata *_metadata) { - unsigned long i, nbits = bitmap_size * BITS_PER_BYTE; - unsigned long nr = nbits / 2; + unsigned long npte = pte_page_size / page_size, pteset = 2 * npte; + unsigned long nbits = bitmap_size * BITS_PER_BYTE; + unsigned long j, i, nr = nbits / pteset ?: 1; __u64 out_dirty = 0; /* Mark all even bits as dirty in the mock domain */ - for (i = 0; i < nbits; i += 2) + memset(bitmap, 0, bitmap_size); + for (i = 0; i < nbits; i += pteset) set_bit(i, (unsigned long *)bitmap); test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size, @@ -365,8 +368,12 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap, flags); /* Beware ASSERT_EQ() is two statements -- braces are not redundant! */ - for (i = 0; i < nbits; i++) { - ASSERT_EQ(!(i % 2), test_bit(i, (unsigned long *)bitmap)); + for (i = 0; i < nbits; i += pteset) { + for (j = 0; j < pteset; j++) { + ASSERT_EQ(j < npte, + test_bit(i + j, (unsigned long *)bitmap)); + } + ASSERT_EQ(!(i % pteset), test_bit(i, (unsigned long *)bitmap)); } memset(bitmap, 0, bitmap_size); @@ -374,19 +381,23 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, flags); /* It as read already -- expect all zeroes */ - for (i = 0; i < nbits; i++) { - ASSERT_EQ(!(i % 2) && (flags & - IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR), - test_bit(i, (unsigned long *)bitmap)); + for (i = 0; i < nbits; i += pteset) { + for (j = 0; j < pteset; j++) { + ASSERT_EQ( + (j < npte) && + (flags & + IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR), + test_bit(i + j, (unsigned long *)bitmap)); + } } return 0; } -#define test_mock_dirty_bitmaps(hwpt_id, length, iova, page_size, bitmap, \ - bitmap_size, flags, _metadata) \ +#define test_mock_dirty_bitmaps(hwpt_id, length, iova, page_size, pte_size,\ + bitmap, bitmap_size, flags, _metadata) \ ASSERT_EQ(0, _test_mock_dirty_bitmaps(self->fd, hwpt_id, length, iova, \ - page_size, bitmap, bitmap_size, \ - flags, _metadata)) + page_size, pte_size, bitmap, \ + bitmap_size, flags, _metadata)) static int _test_cmd_create_access(int fd, unsigned int ioas_id, __u32 *access_id, unsigned int flags) -- GitLab From 02a8c61a8b06a4a082b58c3e643b28036c6be60f Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 2 Feb 2024 13:34:12 +0000 Subject: [PATCH 0736/1405] iommufd/selftest: Refactor mock_domain_read_and_clear_dirty() Move the clearing of the dirty bit of the mock domain into mock_domain_test_and_clear_dirty() helper, simplifying the caller function. Additionally, rework the mock_domain_read_and_clear_dirty() loop to iterate over a potentially variable IO page size. No functional change intended with the loop refactor. This is in preparation for dirty tracking support for IOMMU hugepage mock domains. Link: https://lore.kernel.org/r/20240202133415.23819-7-joao.m.martins@oracle.com Signed-off-by: Joao Martins Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/selftest.c | 64 ++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index d9e9920c7eba4..796e7e3ec0cfd 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -191,6 +191,34 @@ static int mock_domain_set_dirty_tracking(struct iommu_domain *domain, return 0; } +static bool mock_test_and_clear_dirty(struct mock_iommu_domain *mock, + unsigned long iova, size_t page_size, + unsigned long flags) +{ + unsigned long cur, end = iova + page_size - 1; + bool dirty = false; + void *ent, *old; + + for (cur = iova; cur < end; cur += MOCK_IO_PAGE_SIZE) { + ent = xa_load(&mock->pfns, cur / MOCK_IO_PAGE_SIZE); + if (!ent || !(xa_to_value(ent) & MOCK_PFN_DIRTY_IOVA)) + continue; + + dirty = true; + /* Clear dirty */ + if (!(flags & IOMMU_DIRTY_NO_CLEAR)) { + unsigned long val; + + val = xa_to_value(ent) & ~MOCK_PFN_DIRTY_IOVA; + old = xa_store(&mock->pfns, cur / MOCK_IO_PAGE_SIZE, + xa_mk_value(val), GFP_KERNEL); + WARN_ON_ONCE(ent != old); + } + } + + return dirty; +} + static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain, unsigned long iova, size_t size, unsigned long flags, @@ -198,31 +226,29 @@ static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain, { struct mock_iommu_domain *mock = container_of(domain, struct mock_iommu_domain, domain); - unsigned long i, max = size / MOCK_IO_PAGE_SIZE; - void *ent, *old; + unsigned long end = iova + size; + void *ent; if (!(mock->flags & MOCK_DIRTY_TRACK) && dirty->bitmap) return -EINVAL; - for (i = 0; i < max; i++) { - unsigned long cur = iova + i * MOCK_IO_PAGE_SIZE; + do { + unsigned long pgsize = MOCK_IO_PAGE_SIZE; + unsigned long head; - ent = xa_load(&mock->pfns, cur / MOCK_IO_PAGE_SIZE); - if (ent && (xa_to_value(ent) & MOCK_PFN_DIRTY_IOVA)) { - /* Clear dirty */ - if (!(flags & IOMMU_DIRTY_NO_CLEAR)) { - unsigned long val; - - val = xa_to_value(ent) & ~MOCK_PFN_DIRTY_IOVA; - old = xa_store(&mock->pfns, - cur / MOCK_IO_PAGE_SIZE, - xa_mk_value(val), GFP_KERNEL); - WARN_ON_ONCE(ent != old); - } - iommu_dirty_bitmap_record(dirty, cur, - MOCK_IO_PAGE_SIZE); + ent = xa_load(&mock->pfns, iova / MOCK_IO_PAGE_SIZE); + if (!ent) { + iova += pgsize; + continue; } - } + + head = iova & ~(pgsize - 1); + + /* Clear dirty */ + if (mock_test_and_clear_dirty(mock, head, pgsize, flags)) + iommu_dirty_bitmap_record(dirty, head, pgsize); + iova = head + pgsize; + } while (iova < end); return 0; } -- GitLab From 7db521e23fe9e36855b61b01a67291281118570e Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 2 Feb 2024 13:34:13 +0000 Subject: [PATCH 0737/1405] iommufd/selftest: Hugepage mock domain support Add support to mock iommu hugepages of 1M (for a 2K mock io page size). To avoid breaking test suite defaults, the way this is done is by explicitly creating a iommu mock device which has hugepage support (i.e. through MOCK_FLAGS_DEVICE_HUGE_IOVA). The same scheme is maintained of mock base page index tracking in the XArray, except that an extra bit is added to mark it as a hugepage. One subpage containing the dirty bit, means that the whole hugepage is dirty (similar to AMD IOMMU non-standard page sizes). For clearing, same thing applies, and it must clear all dirty subpages. This is in preparation for dirty tracking to mark mock hugepages as dirty to exercise all the iova-bitmap fixes. Link: https://lore.kernel.org/r/20240202133415.23819-8-joao.m.martins@oracle.com Signed-off-by: Joao Martins Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/iommufd_test.h | 1 + drivers/iommu/iommufd/selftest.c | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index 482d4059f5db6..e854d3f672051 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -45,6 +45,7 @@ enum { enum { MOCK_FLAGS_DEVICE_NO_DIRTY = 1 << 0, + MOCK_FLAGS_DEVICE_HUGE_IOVA = 1 << 1, }; enum { diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 796e7e3ec0cfd..8abf9747773eb 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -41,6 +41,7 @@ static atomic_t mock_dev_num; enum { MOCK_DIRTY_TRACK = 1, MOCK_IO_PAGE_SIZE = PAGE_SIZE / 2, + MOCK_HUGE_PAGE_SIZE = 512 * MOCK_IO_PAGE_SIZE, /* * Like a real page table alignment requires the low bits of the address @@ -53,6 +54,7 @@ enum { MOCK_PFN_START_IOVA = _MOCK_PFN_START, MOCK_PFN_LAST_IOVA = _MOCK_PFN_START, MOCK_PFN_DIRTY_IOVA = _MOCK_PFN_START << 1, + MOCK_PFN_HUGE_IOVA = _MOCK_PFN_START << 2, }; /* @@ -242,6 +244,8 @@ static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain, continue; } + if (xa_to_value(ent) & MOCK_PFN_HUGE_IOVA) + pgsize = MOCK_HUGE_PAGE_SIZE; head = iova & ~(pgsize - 1); /* Clear dirty */ @@ -260,6 +264,7 @@ const struct iommu_dirty_ops dirty_ops = { static struct iommu_domain *mock_domain_alloc_paging(struct device *dev) { + struct mock_dev *mdev = container_of(dev, struct mock_dev, dev); struct mock_iommu_domain *mock; mock = kzalloc(sizeof(*mock), GFP_KERNEL); @@ -268,6 +273,8 @@ static struct iommu_domain *mock_domain_alloc_paging(struct device *dev) mock->domain.geometry.aperture_start = MOCK_APERTURE_START; mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST; mock->domain.pgsize_bitmap = MOCK_IO_PAGE_SIZE; + if (dev && mdev->flags & MOCK_FLAGS_DEVICE_HUGE_IOVA) + mock->domain.pgsize_bitmap |= MOCK_HUGE_PAGE_SIZE; mock->domain.ops = mock_ops.default_domain_ops; mock->domain.type = IOMMU_DOMAIN_UNMANAGED; xa_init(&mock->pfns); @@ -313,7 +320,7 @@ mock_domain_alloc_user(struct device *dev, u32 flags, return ERR_PTR(-EOPNOTSUPP); if (user_data || (has_dirty_flag && no_dirty_ops)) return ERR_PTR(-EOPNOTSUPP); - domain = mock_domain_alloc_paging(NULL); + domain = mock_domain_alloc_paging(dev); if (!domain) return ERR_PTR(-ENOMEM); if (has_dirty_flag) @@ -376,6 +383,9 @@ static int mock_domain_map_pages(struct iommu_domain *domain, if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize) flags = MOCK_PFN_LAST_IOVA; + if (pgsize != MOCK_IO_PAGE_SIZE) { + flags |= MOCK_PFN_HUGE_IOVA; + } old = xa_store(&mock->pfns, iova / MOCK_IO_PAGE_SIZE, xa_mk_value((paddr / MOCK_IO_PAGE_SIZE) | flags), @@ -630,7 +640,8 @@ static struct mock_dev *mock_dev_create(unsigned long dev_flags) struct mock_dev *mdev; int rc; - if (dev_flags & ~(MOCK_FLAGS_DEVICE_NO_DIRTY)) + if (dev_flags & + ~(MOCK_FLAGS_DEVICE_NO_DIRTY | MOCK_FLAGS_DEVICE_HUGE_IOVA)) return ERR_PTR(-EINVAL); mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); -- GitLab From fe13166f0562117c42fc60a109e664a914523e63 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 2 Feb 2024 13:34:14 +0000 Subject: [PATCH 0738/1405] iommufd/selftest: Add mock IO hugepages tests Leverage previously added MOCK_FLAGS_DEVICE_HUGE_IOVA flag to create an IOMMU domain with more than MOCK_IO_PAGE_SIZE supported. Plumb the hugetlb backing memory for buffer allocation and change the expected page size to MOCK_HUGE_PAGE_SIZE (1M) when hugepage variant test cases are used. These so far are limited to 128M and 256M IOVA range tests cases which is when 1M hugepages can be used. Link: https://lore.kernel.org/r/20240202133415.23819-9-joao.m.martins@oracle.com Signed-off-by: Joao Martins Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd.c | 45 +++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 56c3e511a0ab7..edf1c99c9936c 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -12,6 +12,7 @@ static unsigned long HUGEPAGE_SIZE; #define MOCK_PAGE_SIZE (PAGE_SIZE / 2) +#define MOCK_HUGE_PAGE_SIZE (512 * MOCK_PAGE_SIZE) static unsigned long get_huge_page_size(void) { @@ -1716,10 +1717,12 @@ FIXTURE(iommufd_dirty_tracking) FIXTURE_VARIANT(iommufd_dirty_tracking) { unsigned long buffer_size; + bool hugepages; }; FIXTURE_SETUP(iommufd_dirty_tracking) { + int mmap_flags; void *vrc; int rc; @@ -1732,9 +1735,17 @@ FIXTURE_SETUP(iommufd_dirty_tracking) variant->buffer_size, rc); } + mmap_flags = MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED; + if (variant->hugepages) { + /* + * MAP_POPULATE will cause the kernel to fail mmap if THPs are + * not available. + */ + mmap_flags |= MAP_HUGETLB | MAP_POPULATE; + } assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0); vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + mmap_flags, -1, 0); assert(vrc == self->buffer); self->page_size = MOCK_PAGE_SIZE; @@ -1749,8 +1760,16 @@ FIXTURE_SETUP(iommufd_dirty_tracking) assert((uintptr_t)self->bitmap % PAGE_SIZE == 0); test_ioctl_ioas_alloc(&self->ioas_id); - test_cmd_mock_domain(self->ioas_id, &self->stdev_id, &self->hwpt_id, - &self->idev_id); + /* Enable 1M mock IOMMU hugepages */ + if (variant->hugepages) { + test_cmd_mock_domain_flags(self->ioas_id, + MOCK_FLAGS_DEVICE_HUGE_IOVA, + &self->stdev_id, &self->hwpt_id, + &self->idev_id); + } else { + test_cmd_mock_domain(self->ioas_id, &self->stdev_id, + &self->hwpt_id, &self->idev_id); + } } FIXTURE_TEARDOWN(iommufd_dirty_tracking) @@ -1784,12 +1803,26 @@ FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M) .buffer_size = 128UL * 1024UL * 1024UL, }; +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge) +{ + /* 4K bitmap (128M IOVA range) */ + .buffer_size = 128UL * 1024UL * 1024UL, + .hugepages = true, +}; + FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M) { /* 8K bitmap (256M IOVA range) */ .buffer_size = 256UL * 1024UL * 1024UL, }; +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M_huge) +{ + /* 8K bitmap (256M IOVA range) */ + .buffer_size = 256UL * 1024UL * 1024UL, + .hugepages = true, +}; + TEST_F(iommufd_dirty_tracking, enforce_dirty) { uint32_t ioas_id, stddev_id, idev_id; @@ -1853,6 +1886,9 @@ TEST_F(iommufd_dirty_tracking, get_dirty_bitmap) uint32_t hwpt_id; uint32_t ioas_id; + if (variant->hugepages) + page_size = MOCK_HUGE_PAGE_SIZE; + test_ioctl_ioas_alloc(&ioas_id); test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer, variant->buffer_size, MOCK_APERTURE_START); @@ -1887,6 +1923,9 @@ TEST_F(iommufd_dirty_tracking, get_dirty_bitmap_no_clear) uint32_t hwpt_id; uint32_t ioas_id; + if (variant->hugepages) + page_size = MOCK_HUGE_PAGE_SIZE; + test_ioctl_ioas_alloc(&ioas_id); test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer, variant->buffer_size, MOCK_APERTURE_START); -- GitLab From 4bbcbc6ea2fa379632a24c14cfb47aa603816ac6 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Fri, 2 Feb 2024 13:34:15 +0000 Subject: [PATCH 0739/1405] iommufd/iova_bitmap: Consider page offset for the pages to be pinned For small bitmaps that aren't PAGE_SIZE aligned *and* that are less than 512 pages in bitmap length, use an extra page to be able to cover the entire range e.g. [1M..3G] which would be iterated more efficiently in a single iteration, rather than two. Fixes: b058ea3ab5af ("vfio/iova_bitmap: refactor iova_bitmap_set() to better handle page boundaries") Link: https://lore.kernel.org/r/20240202133415.23819-10-joao.m.martins@oracle.com Signed-off-by: Joao Martins Tested-by: Avihai Horon Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/iova_bitmap.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index b370e8ee88665..db8c46bee1559 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -178,18 +178,19 @@ static int iova_bitmap_get(struct iova_bitmap *bitmap) bitmap->mapped_base_index) * sizeof(*bitmap->bitmap), PAGE_SIZE); - /* - * We always cap at max number of 'struct page' a base page can fit. - * This is, for example, on x86 means 2M of bitmap data max. - */ - npages = min(npages, PAGE_SIZE / sizeof(struct page *)); - /* * Bitmap address to be pinned is calculated via pointer arithmetic * with bitmap u64 word index. */ addr = bitmap->bitmap + bitmap->mapped_base_index; + /* + * We always cap at max number of 'struct page' a base page can fit. + * This is, for example, on x86 means 2M of bitmap data max. + */ + npages = min(npages + !!offset_in_page(addr), + PAGE_SIZE / sizeof(struct page *)); + ret = pin_user_pages_fast((unsigned long)addr, npages, FOLL_WRITE, mapped->pages); if (ret <= 0) -- GitLab From 853b8d7597eea4ccaaefbcf0942cd42fc86d542a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 2 Feb 2024 12:22:58 +0200 Subject: [PATCH 0740/1405] remap_range: merge do_clone_file_range() into vfs_clone_file_range() commit dfad37051ade ("remap_range: move permission hooks out of do_clone_file_range()") moved the permission hooks from do_clone_file_range() out to its caller vfs_clone_file_range(), but left all the fast sanity checks in do_clone_file_range(). This makes the expensive security hooks be called in situations that they would not have been called before (e.g. fs does not support clone). The only reason for the do_clone_file_range() helper was that overlayfs did not use to be able to call vfs_clone_file_range() from copy up context with sb_writers lock held. However, since commit c63e56a4a652 ("ovl: do not open/llseek lower file with upper sb_writers held"), overlayfs just uses an open coded version of vfs_clone_file_range(). Merge_clone_file_range() into vfs_clone_file_range(), restoring the original order of checks as it was before the regressing commit and adapt the overlayfs code to call vfs_clone_file_range() before the permission hooks that were added by commit ca7ab482401c ("ovl: add permission hooks outside of do_splice_direct()"). Note that in the merge of do_clone_file_range(), the file_start_write() context was reduced to cover ->remap_file_range() without holding it over the permission hooks, which was the reason for doing the regressing commit in the first place. Reported-and-tested-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202401312229.eddeb9a6-oliver.sang@intel.com Fixes: dfad37051ade ("remap_range: move permission hooks out of do_clone_file_range()") Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20240202102258.1582671-1-amir73il@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/overlayfs/copy_up.c | 14 ++++++-------- fs/remap_range.c | 31 +++++++++---------------------- include/linux/fs.h | 3 --- 3 files changed, 15 insertions(+), 33 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index b8e25ca51016d..8586e2f5d2439 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -265,20 +265,18 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, if (IS_ERR(old_file)) return PTR_ERR(old_file); + /* Try to use clone_file_range to clone up within the same fs */ + cloned = vfs_clone_file_range(old_file, 0, new_file, 0, len, 0); + if (cloned == len) + goto out_fput; + + /* Couldn't clone, so now we try to copy the data */ error = rw_verify_area(READ, old_file, &old_pos, len); if (!error) error = rw_verify_area(WRITE, new_file, &new_pos, len); if (error) goto out_fput; - /* Try to use clone_file_range to clone up within the same fs */ - ovl_start_write(dentry); - cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0); - ovl_end_write(dentry); - if (cloned == len) - goto out_fput; - /* Couldn't clone, so now we try to copy the data */ - /* Check if lower fs supports seek operation */ if (old_file->f_mode & FMODE_LSEEK) skip_hole = true; diff --git a/fs/remap_range.c b/fs/remap_range.c index f8c1120b8311f..de07f978ce3eb 100644 --- a/fs/remap_range.c +++ b/fs/remap_range.c @@ -373,9 +373,9 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, } EXPORT_SYMBOL(generic_remap_file_range_prep); -loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t len, unsigned int remap_flags) +loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + loff_t len, unsigned int remap_flags) { loff_t ret; @@ -391,23 +391,6 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, if (!file_in->f_op->remap_file_range) return -EOPNOTSUPP; - ret = file_in->f_op->remap_file_range(file_in, pos_in, - file_out, pos_out, len, remap_flags); - if (ret < 0) - return ret; - - fsnotify_access(file_in); - fsnotify_modify(file_out); - return ret; -} -EXPORT_SYMBOL(do_clone_file_range); - -loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t len, unsigned int remap_flags) -{ - loff_t ret; - ret = remap_verify_area(file_in, pos_in, len, false); if (ret) return ret; @@ -417,10 +400,14 @@ loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, return ret; file_start_write(file_out); - ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len, - remap_flags); + ret = file_in->f_op->remap_file_range(file_in, pos_in, + file_out, pos_out, len, remap_flags); file_end_write(file_out); + if (ret < 0) + return ret; + fsnotify_access(file_in); + fsnotify_modify(file_out); return ret; } EXPORT_SYMBOL(vfs_clone_file_range); diff --git a/include/linux/fs.h b/include/linux/fs.h index ed5966a704951..023f37c607094 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2101,9 +2101,6 @@ int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *count, unsigned int remap_flags); -extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t len, unsigned int remap_flags); extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); -- GitLab From f814bdda774c183b0cc15ec8f3b6e7c6f4527ba5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 23 Jan 2024 18:58:26 +0100 Subject: [PATCH 0741/1405] blk-wbt: Fix detection of dirty-throttled tasks The detection of dirty-throttled tasks in blk-wbt has been subtly broken since its beginning in 2016. Namely if we are doing cgroup writeback and the throttled task is not in the root cgroup, balance_dirty_pages() will set dirty_sleep for the non-root bdi_writeback structure. However blk-wbt checks dirty_sleep only in the root cgroup bdi_writeback structure. Thus detection of recently throttled tasks is not working in this case (we noticed this when we switched to cgroup v2 and suddently writeback was slow). Since blk-wbt has no easy way to get to proper bdi_writeback and furthermore its intention has always been to work on the whole device rather than on individual cgroups, just move the dirty_sleep timestamp from bdi_writeback to backing_dev_info. That fixes the checking for recently throttled task and saves memory for everybody as a bonus. CC: stable@vger.kernel.org Fixes: b57d74aff9ab ("writeback: track if we're sleeping on progress in balance_dirty_pages()") Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20240123175826.21452-1-jack@suse.cz [axboe: fixup indentation errors] Signed-off-by: Jens Axboe --- block/blk-wbt.c | 4 ++-- include/linux/backing-dev-defs.h | 7 +++++-- mm/backing-dev.c | 2 +- mm/page-writeback.c | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 5ba3cd574eacb..0c0e270a82650 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -163,9 +163,9 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var) */ static bool wb_recent_wait(struct rq_wb *rwb) { - struct bdi_writeback *wb = &rwb->rqos.disk->bdi->wb; + struct backing_dev_info *bdi = rwb->rqos.disk->bdi; - return time_before(jiffies, wb->dirty_sleep + HZ); + return time_before(jiffies, bdi->last_bdp_sleep + HZ); } static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index ae12696ec492c..2ad261082bba5 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -141,8 +141,6 @@ struct bdi_writeback { struct delayed_work dwork; /* work item used for writeback */ struct delayed_work bw_dwork; /* work item used for bandwidth estimate */ - unsigned long dirty_sleep; /* last wait */ - struct list_head bdi_node; /* anchored at bdi->wb_list */ #ifdef CONFIG_CGROUP_WRITEBACK @@ -179,6 +177,11 @@ struct backing_dev_info { * any dirty wbs, which is depended upon by bdi_has_dirty(). */ atomic_long_t tot_write_bandwidth; + /* + * Jiffies when last process was dirty throttled on this bdi. Used by + * blk-wbt. + */ + unsigned long last_bdp_sleep; struct bdi_writeback wb; /* the root writeback info for this bdi */ struct list_head wb_list; /* list of all wbs */ diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 1e3447bccdb14..e039d05304dd9 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -436,7 +436,6 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, INIT_LIST_HEAD(&wb->work_list); INIT_DELAYED_WORK(&wb->dwork, wb_workfn); INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn); - wb->dirty_sleep = jiffies; err = fprop_local_init_percpu(&wb->completions, gfp); if (err) @@ -921,6 +920,7 @@ int bdi_init(struct backing_dev_info *bdi) INIT_LIST_HEAD(&bdi->bdi_list); INIT_LIST_HEAD(&bdi->wb_list); init_waitqueue_head(&bdi->wb_waitq); + bdi->last_bdp_sleep = jiffies; return cgwb_bdi_init(bdi); } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index cd4e4ae77c40a..cc37fa7f33641 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1921,7 +1921,7 @@ static int balance_dirty_pages(struct bdi_writeback *wb, break; } __set_current_state(TASK_KILLABLE); - wb->dirty_sleep = now; + bdi->last_bdp_sleep = jiffies; io_schedule_timeout(pause); current->dirty_paused_when = now + pause; -- GitLab From ba58f873cdeec30b6da48e28dd5782c5a3e1371b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Feb 2024 15:18:31 -0800 Subject: [PATCH 0742/1405] KVM: selftests: Fix a semaphore imbalance in the dirty ring logging test When finishing the final iteration of dirty_log_test testcase, set host_quit _before_ the final "continue" so that the vCPU worker doesn't run an extra iteration, and delete the hack-a-fix of an extra "continue" from the dirty ring testcase. This fixes a bug where the extra post to sem_vcpu_cont may not be consumed, which results in failures in subsequent runs of the testcases. The bug likely was missed during development as x86 supports only a single "guest mode", i.e. there aren't any subsequent testcases after the dirty ring test, because for_each_guest_mode() only runs a single iteration. For the regular dirty log testcases, letting the vCPU run one extra iteration is a non-issue as the vCPU worker waits on sem_vcpu_cont if and only if the worker is explicitly told to stop (vcpu_sync_stop_requested). But for the dirty ring test, which needs to periodically stop the vCPU to reap the dirty ring, letting the vCPU resume the guest _after_ the last iteration means the vCPU will get stuck without an extra "continue". However, blindly firing off an post to sem_vcpu_cont isn't guaranteed to be consumed, e.g. if the vCPU worker sees host_quit==true before resuming the guest. This results in a dangling sem_vcpu_cont, which leads to subsequent iterations getting out of sync, as the vCPU worker will continue on before the main task is ready for it to resume the guest, leading to a variety of asserts, e.g. ==== Test Assertion Failure ==== dirty_log_test.c:384: dirty_ring_vcpu_ring_full pid=14854 tid=14854 errno=22 - Invalid argument 1 0x00000000004033eb: dirty_ring_collect_dirty_pages at dirty_log_test.c:384 2 0x0000000000402d27: log_mode_collect_dirty_pages at dirty_log_test.c:505 3 (inlined by) run_test at dirty_log_test.c:802 4 0x0000000000403dc7: for_each_guest_mode at guest_modes.c:100 5 0x0000000000401dff: main at dirty_log_test.c:941 (discriminator 3) 6 0x0000ffff9be173c7: ?? ??:0 7 0x0000ffff9be1749f: ?? ??:0 8 0x000000000040206f: _start at ??:? Didn't continue vcpu even without ring full Alternatively, the test could simply reset the semaphores before each testcase, but papering over hacks with more hacks usually ends in tears. Reported-by: Shaoqin Huang Fixes: 84292e565951 ("KVM: selftests: Add dirty ring buffer test") Reviewed-by: Peter Xu Reviewed-by: Shaoqin Huang Link: https://lore.kernel.org/r/20240202231831.354848-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/dirty_log_test.c | 50 +++++++++++--------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index babea97b31a43..eaad5b20854cc 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -376,7 +376,10 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, cleared = kvm_vm_reset_dirty_ring(vcpu->vm); - /* Cleared pages should be the same as collected */ + /* + * Cleared pages should be the same as collected, as KVM is supposed to + * clear only the entries that have been harvested. + */ TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch " "with collected (%u)", cleared, count); @@ -415,12 +418,6 @@ static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) } } -static void dirty_ring_before_vcpu_join(void) -{ - /* Kick another round of vcpu just to make sure it will quit */ - sem_post(&sem_vcpu_cont); -} - struct log_mode { const char *name; /* Return true if this mode is supported, otherwise false */ @@ -433,7 +430,6 @@ struct log_mode { uint32_t *ring_buf_idx); /* Hook to call when after each vcpu run */ void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err); - void (*before_vcpu_join) (void); } log_modes[LOG_MODE_NUM] = { { .name = "dirty-log", @@ -452,7 +448,6 @@ struct log_mode { .supported = dirty_ring_supported, .create_vm_done = dirty_ring_create_vm_done, .collect_dirty_pages = dirty_ring_collect_dirty_pages, - .before_vcpu_join = dirty_ring_before_vcpu_join, .after_vcpu_run = dirty_ring_after_vcpu_run, }, }; @@ -513,14 +508,6 @@ static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) mode->after_vcpu_run(vcpu, ret, err); } -static void log_mode_before_vcpu_join(void) -{ - struct log_mode *mode = &log_modes[host_log_mode]; - - if (mode->before_vcpu_join) - mode->before_vcpu_join(); -} - static void generate_random_array(uint64_t *guest_array, uint64_t size) { uint64_t i; @@ -719,6 +706,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_vm *vm; unsigned long *bmap; uint32_t ring_buf_idx = 0; + int sem_val; if (!log_mode_supported()) { print_skip("Log mode '%s' not supported", @@ -788,12 +776,22 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Start the iterations */ iteration = 1; sync_global_to_guest(vm, iteration); - host_quit = false; + WRITE_ONCE(host_quit, false); host_dirty_count = 0; host_clear_count = 0; host_track_next_count = 0; WRITE_ONCE(dirty_ring_vcpu_ring_full, false); + /* + * Ensure the previous iteration didn't leave a dangling semaphore, i.e. + * that the main task and vCPU worker were synchronized and completed + * verification of all iterations. + */ + sem_getvalue(&sem_vcpu_stop, &sem_val); + TEST_ASSERT_EQ(sem_val, 0); + sem_getvalue(&sem_vcpu_cont, &sem_val); + TEST_ASSERT_EQ(sem_val, 0); + pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); while (iteration < p->iterations) { @@ -819,15 +817,21 @@ static void run_test(enum vm_guest_mode mode, void *arg) assert(host_log_mode == LOG_MODE_DIRTY_RING || atomic_read(&vcpu_sync_stop_requested) == false); vm_dirty_log_verify(mode, bmap); - sem_post(&sem_vcpu_cont); - iteration++; + /* + * Set host_quit before sem_vcpu_cont in the final iteration to + * ensure that the vCPU worker doesn't resume the guest. As + * above, the dirty ring test may stop and wait even when not + * explicitly request to do so, i.e. would hang waiting for a + * "continue" if it's allowed to resume the guest. + */ + if (++iteration == p->iterations) + WRITE_ONCE(host_quit, true); + + sem_post(&sem_vcpu_cont); sync_global_to_guest(vm, iteration); } - /* Tell the vcpu thread to quit */ - host_quit = true; - log_mode_before_vcpu_join(); pthread_join(vcpu_thread, NULL); pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " -- GitLab From 6fd78beed0213655c16ef728af0caec6d5ae4c73 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 31 Jan 2024 14:27:28 -0800 Subject: [PATCH 0743/1405] KVM: selftests: Don't assert on exact number of 4KiB in dirty log split test Drop dirty_log_page_splitting_test's assertion that the number of 4KiB pages remains the same across dirty logging being enabled and disabled, as the test doesn't guarantee that mappings outside of the memslots being dirty logged are stable, e.g. KVM's mappings for code and pages in memslot0 can be zapped by things like NUMA balancing. To preserve the spirit of the check, assert that (a) the number of 4KiB pages after splitting is _at least_ the number of 4KiB pages across all memslots under test, and (b) the number of hugepages before splitting adds up to the number of pages across all memslots under test. (b) is a little tenuous as it relies on memslot0 being incompatible with transparent hugepages, but that holds true for now as selftests explicitly madvise() MADV_NOHUGEPAGE for memslot0 (__vm_create() unconditionally specifies the backing type as VM_MEM_SRC_ANONYMOUS). Reported-by: Yi Lai Reported-by: Tao Su Reviewed-by: Tao Su Link: https://lore.kernel.org/r/20240131222728.4100079-1-seanjc@google.com Signed-off-by: Sean Christopherson --- .../x86_64/dirty_log_page_splitting_test.c | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c index 634c6bfcd5720..ee3b384b991c8 100644 --- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c +++ b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c @@ -92,7 +92,6 @@ static void run_test(enum vm_guest_mode mode, void *unused) uint64_t host_num_pages; uint64_t pages_per_slot; int i; - uint64_t total_4k_pages; struct kvm_page_stats stats_populated; struct kvm_page_stats stats_dirty_logging_enabled; struct kvm_page_stats stats_dirty_pass[ITERATIONS]; @@ -107,6 +106,9 @@ static void run_test(enum vm_guest_mode mode, void *unused) guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); host_num_pages = vm_num_host_pages(mode, guest_num_pages); pages_per_slot = host_num_pages / SLOTS; + TEST_ASSERT_EQ(host_num_pages, pages_per_slot * SLOTS); + TEST_ASSERT(!(host_num_pages % 512), + "Number of pages, '%lu' not a multiple of 2MiB", host_num_pages); bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot); @@ -165,10 +167,8 @@ static void run_test(enum vm_guest_mode mode, void *unused) memstress_free_bitmaps(bitmaps, SLOTS); memstress_destroy_vm(vm); - /* Make assertions about the page counts. */ - total_4k_pages = stats_populated.pages_4k; - total_4k_pages += stats_populated.pages_2m * 512; - total_4k_pages += stats_populated.pages_1g * 512 * 512; + TEST_ASSERT_EQ((stats_populated.pages_2m * 512 + + stats_populated.pages_1g * 512 * 512), host_num_pages); /* * Check that all huge pages were split. Since large pages can only @@ -180,19 +180,22 @@ static void run_test(enum vm_guest_mode mode, void *unused) */ if (dirty_log_manual_caps) { TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0); - TEST_ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages); + TEST_ASSERT(stats_clear_pass[0].pages_4k >= host_num_pages, + "Expected at least '%lu' 4KiB pages, found only '%lu'", + host_num_pages, stats_clear_pass[0].pages_4k); TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages); } else { TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0); - TEST_ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages); + TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= host_num_pages, + "Expected at least '%lu' 4KiB pages, found only '%lu'", + host_num_pages, stats_dirty_logging_enabled.pages_4k); } /* * Once dirty logging is disabled and the vCPUs have touched all their - * memory again, the page counts should be the same as they were + * memory again, the hugepage counts should be the same as they were * right after initial population of memory. */ - TEST_ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k); TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m); TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g); } -- GitLab From 82b143aeb169b8b55798d7d2063032e1a6ceeeb0 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 5 Feb 2024 10:39:20 +0100 Subject: [PATCH 0744/1405] Revert "parisc: Only list existing CPUs in cpu_possible_mask" This reverts commit 0921244f6f4f0d05698b953fe632a99b38907226. It broke CPU hotplugging because it modifies the __cpu_possible_mask after bootup, so that it will be different than nr_cpu_ids, which then effictively breaks the workqueue setup code and triggers crashes when shutting down CPUs at runtime. Guenter was the first who noticed the wrong values in __cpu_possible_mask, since the cpumask Kunit tests were failig. Reverting this commit fixes both issues, but sadly brings back this uncritical runtime warning: register_cpu_capacity_sysctl: too early to get CPU4 device! Signed-off-by: Helge Deller Reported-by: Guenter Roeck Link: https://lkml.org/lkml/2024/2/4/146 Link: https://lore.kernel.org/lkml/Zb0mbHlIud_bqftx@slm.duckdns.org/t/ Cc: stable@vger.kernel.org # 6.0+ --- arch/parisc/kernel/processor.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index e95a977ba5f37..bf73562706b2e 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -172,7 +172,6 @@ static int __init processor_probe(struct parisc_device *dev) p->cpu_num = cpu_info.cpu_num; p->cpu_loc = cpu_info.cpu_loc; - set_cpu_possible(cpuid, true); store_cpu_topology(cpuid); #ifdef CONFIG_SMP @@ -474,13 +473,6 @@ static struct parisc_driver cpu_driver __refdata = { */ void __init processor_init(void) { - unsigned int cpu; - reset_cpu_topology(); - - /* reset possible mask. We will mark those which are possible. */ - for_each_possible_cpu(cpu) - set_cpu_possible(cpu, false); - register_parisc_driver(&cpu_driver); } -- GitLab From 7ed4380009e96d9e9c605e12822e987b35b05648 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Wed, 7 Feb 2024 08:01:17 +0900 Subject: [PATCH 0745/1405] firewire: core: send bus reset promptly on gap count error If we are bus manager and the bus has inconsistent gap counts, send a bus reset immediately instead of trying to read the root node's config ROM first. Otherwise, we could spend a lot of time trying to read the config ROM but never succeeding. This eliminates a 50+ second delay before the FireWire bus is usable after a newly connected device is powered on in certain circumstances. The delay occurs if a gap count inconsistency occurs, we are not the root node, and we become bus manager. One scenario that causes this is with a TI XIO2213B OHCI, the first time a Sony DSR-25 is powered on after being connected to the FireWire cable. In this configuration, the Linux box will not receive the initial PHY configuration packet sent by the DSR-25 as IRM, resulting in the DSR-25 having a gap count of 44 while the Linux box has a gap count of 63. FireWire devices have a gap count parameter, which is set to 63 on power-up and can be changed with a PHY configuration packet. This determines the duration of the subaction and arbitration gaps. For reliable communication, all nodes on a FireWire bus must have the same gap count. A node may have zero or more of the following roles: root node, bus manager (BM), isochronous resource manager (IRM), and cycle master. Unless a root node was forced with a PHY configuration packet, any node might become root node after a bus reset. Only the root node can become cycle master. If the root node is not cycle master capable, the BM or IRM should force a change of root node. After a bus reset, each node sends a self-ID packet, which contains its current gap count. A single bus reset does not change the gap count, but two bus resets in a row will set the gap count to 63. Because a consistent gap count is required for reliable communication, IEEE 1394a-2000 requires that the bus manager generate a bus reset if it detects that the gap count is inconsistent. When the gap count is inconsistent, build_tree() will notice this after the self identification process. It will set card->gap_count to the invalid value 0. If we become bus master, this will force bm_work() to send a bus reset when it performs gap count optimization. After a bus reset, there is no bus manager. We will almost always try to become bus manager. Once we become bus manager, we will first determine whether the root node is cycle master capable. Then, we will determine if the gap count should be changed. If either the root node or the gap count should be changed, we will generate a bus reset. To determine if the root node is cycle master capable, we read its configuration ROM. bm_work() will wait until we have finished trying to read the configuration ROM. However, an inconsistent gap count can make this take a long time. read_config_rom() will read the first few quadlets from the config ROM. Due to the gap count inconsistency, eventually one of the reads will time out. When read_config_rom() fails, fw_device_init() calls it again until MAX_RETRIES is reached. This takes 50+ seconds. Once we give up trying to read the configuration ROM, bm_work() will wake up, assume that the root node is not cycle master capable, and do a bus reset. Hopefully, this will resolve the gap count inconsistency. This change makes bm_work() check for an inconsistent gap count before waiting for the root node's configuration ROM. If the gap count is inconsistent, bm_work() will immediately do a bus reset. This eliminates the 50+ second delay and rapidly brings the bus to a working state. I considered that if the gap count is inconsistent, a PHY configuration packet might not be successful, so it could be desirable to skip the PHY configuration packet before the bus reset in this case. However, IEEE 1394a-2000 and IEEE 1394-2008 say that the bus manager may transmit a PHY configuration packet before a bus reset when correcting a gap count error. Since the standard endorses this, I decided it's safe to retain the PHY configuration packet transmission. Normally, after a topology change, we will reset the bus a maximum of 5 times to change the root node and perform gap count optimization. However, if there is a gap count inconsistency, we must always generate a bus reset. Otherwise the gap count inconsistency will persist and communication will be unreliable. For that reason, if there is a gap count inconstency, we generate a bus reset even if we already reached the 5 reset limit. Signed-off-by: Adam Goldman Reference: https://sourceforge.net/p/linux1394/mailman/message/58727806/ Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-card.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 6ac5ff20a2fe2..8aaa7fcb2630d 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -429,7 +429,23 @@ static void bm_work(struct work_struct *work) */ card->bm_generation = generation; - if (root_device == NULL) { + if (card->gap_count == 0) { + /* + * If self IDs have inconsistent gap counts, do a + * bus reset ASAP. The config rom read might never + * complete, so don't wait for it. However, still + * send a PHY configuration packet prior to the + * bus reset. The PHY configuration packet might + * fail, but 1394-2008 8.4.5.2 explicitly permits + * it in this case, so it should be safe to try. + */ + new_root_id = local_id; + /* + * We must always send a bus reset if the gap count + * is inconsistent, so bypass the 5-reset limit. + */ + card->bm_retries = 0; + } else if (root_device == NULL) { /* * Either link_on is false, or we failed to read the * config rom. In either case, pick another root. -- GitLab From 829388b725f8d266ccec32a2f446717d8693eaba Mon Sep 17 00:00:00 2001 From: David Gow Date: Thu, 1 Feb 2024 14:04:36 +0800 Subject: [PATCH 0746/1405] kunit: device: Unregister the kunit_bus on shutdown If KUnit is built as a module, and it's unloaded, the kunit_bus is not unregistered. This causes an error if it's then re-loaded later, as we try to re-register the bus. Unregister the bus and root_device on shutdown, if it looks valid. In addition, be more specific about the value of kunit_bus_device. It is: - a valid struct device* if the kunit_bus initialised correctly. - an ERR_PTR if it failed to initialise. - NULL before initialisation and after shutdown. Fixes: d03c720e03bd ("kunit: Add APIs for managing devices") Signed-off-by: David Gow Reviewed-by: Rae Moar Signed-off-by: Shuah Khan --- lib/kunit/device-impl.h | 2 ++ lib/kunit/device.c | 14 ++++++++++++++ lib/kunit/test.c | 3 +++ 3 files changed, 19 insertions(+) diff --git a/lib/kunit/device-impl.h b/lib/kunit/device-impl.h index 54bd558364053..5fcd48ff0f36a 100644 --- a/lib/kunit/device-impl.h +++ b/lib/kunit/device-impl.h @@ -13,5 +13,7 @@ // For internal use only -- registers the kunit_bus. int kunit_bus_init(void); +// For internal use only -- unregisters the kunit_bus. +void kunit_bus_shutdown(void); #endif //_KUNIT_DEVICE_IMPL_H diff --git a/lib/kunit/device.c b/lib/kunit/device.c index 074c6dd2e36a7..644a38a1f5b1c 100644 --- a/lib/kunit/device.c +++ b/lib/kunit/device.c @@ -54,6 +54,20 @@ int kunit_bus_init(void) return error; } +/* Unregister the 'kunit_bus' in case the KUnit module is unloaded. */ +void kunit_bus_shutdown(void) +{ + /* Make sure the bus exists before we unregister it. */ + if (IS_ERR_OR_NULL(kunit_bus_device)) + return; + + bus_unregister(&kunit_bus_type); + + root_device_unregister(kunit_bus_device); + + kunit_bus_device = NULL; +} + /* Release a 'fake' KUnit device. */ static void kunit_device_release(struct device *d) { diff --git a/lib/kunit/test.c b/lib/kunit/test.c index 31a5a992e6467..1d1475578515c 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -928,6 +928,9 @@ static void __exit kunit_exit(void) #ifdef CONFIG_MODULES unregister_module_notifier(&kunit_mod_nb); #endif + + kunit_bus_shutdown(); + kunit_debugfs_cleanup(); } module_exit(kunit_exit); -- GitLab From 97cf301fa42e8ea6e0a24de97bc0abcdc87d9504 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Sun, 28 Jan 2024 13:04:05 +0100 Subject: [PATCH 0747/1405] riscv: Flush the tlb when a page directory is freed The riscv privileged specification mandates to flush the TLB whenever a page directory is modified, so add that to tlb_flush(). Fixes: c5e9b2c2ae82 ("riscv: Improve tlb_flush()") Signed-off-by: Alexandre Ghiti Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20240128120405.25876-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/tlb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h index 1eb5682b2af60..50b63b5c15bd8 100644 --- a/arch/riscv/include/asm/tlb.h +++ b/arch/riscv/include/asm/tlb.h @@ -16,7 +16,7 @@ static void tlb_flush(struct mmu_gather *tlb); static inline void tlb_flush(struct mmu_gather *tlb) { #ifdef CONFIG_MMU - if (tlb->fullmm || tlb->need_flush_all) + if (tlb->fullmm || tlb->need_flush_all || tlb->freed_tables) flush_tlb_mm(tlb->mm); else flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, -- GitLab From 1279f9d9dec2d7462823a18c29ad61359e0a007d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 3 Feb 2024 10:31:49 -0800 Subject: [PATCH 0748/1405] af_unix: Call kfree_skb() for dead unix_(sk)->oob_skb in GC. syzbot reported a warning [0] in __unix_gc() with a repro, which creates a socketpair and sends one socket's fd to itself using the peer. socketpair(AF_UNIX, SOCK_STREAM, 0, [3, 4]) = 0 sendmsg(4, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="\360", iov_len=1}], msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, cmsg_data=[3]}], msg_controllen=24, msg_flags=0}, MSG_OOB|MSG_PROBE|MSG_DONTWAIT|MSG_ZEROCOPY) = 1 This forms a self-cyclic reference that GC should finally untangle but does not due to lack of MSG_OOB handling, resulting in memory leak. Recently, commit 11498715f266 ("af_unix: Remove io_uring code for GC.") removed io_uring's dead code in GC and revealed the problem. The code was executed at the final stage of GC and unconditionally moved all GC candidates from gc_candidates to gc_inflight_list. That papered over the reported problem by always making the following WARN_ON_ONCE(!list_empty(&gc_candidates)) false. The problem has been there since commit 2aab4b969002 ("af_unix: fix struct pid leaks in OOB support") added full scm support for MSG_OOB while fixing another bug. To fix this problem, we must call kfree_skb() for unix_sk(sk)->oob_skb if the socket still exists in gc_candidates after purging collected skb. Then, we need to set NULL to oob_skb before calling kfree_skb() because it calls last fput() and triggers unix_release_sock(), where we call duplicate kfree_skb(u->oob_skb) if not NULL. Note that the leaked socket remained being linked to a global list, so kmemleak also could not detect it. We need to check /proc/net/protocol to notice the unfreed socket. [0]: WARNING: CPU: 0 PID: 2863 at net/unix/garbage.c:345 __unix_gc+0xc74/0xe80 net/unix/garbage.c:345 Modules linked in: CPU: 0 PID: 2863 Comm: kworker/u4:11 Not tainted 6.8.0-rc1-syzkaller-00583-g1701940b1a02 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Workqueue: events_unbound __unix_gc RIP: 0010:__unix_gc+0xc74/0xe80 net/unix/garbage.c:345 Code: 8b 5c 24 50 e9 86 f8 ff ff e8 f8 e4 22 f8 31 d2 48 c7 c6 30 6a 69 89 4c 89 ef e8 97 ef ff ff e9 80 f9 ff ff e8 dd e4 22 f8 90 <0f> 0b 90 e9 7b fd ff ff 48 89 df e8 5c e7 7c f8 e9 d3 f8 ff ff e8 RSP: 0018:ffffc9000b03fba0 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffffc9000b03fc10 RCX: ffffffff816c493e RDX: ffff88802c02d940 RSI: ffffffff896982f3 RDI: ffffc9000b03fb30 RBP: ffffc9000b03fce0 R08: 0000000000000001 R09: fffff52001607f66 R10: 0000000000000003 R11: 0000000000000002 R12: dffffc0000000000 R13: ffffc9000b03fc10 R14: ffffc9000b03fc10 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff8880b9400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005559c8677a60 CR3: 000000000d57a000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: process_one_work+0x889/0x15e0 kernel/workqueue.c:2633 process_scheduled_works kernel/workqueue.c:2706 [inline] worker_thread+0x8b9/0x12a0 kernel/workqueue.c:2787 kthread+0x2c6/0x3b0 kernel/kthread.c:388 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:242 Reported-by: syzbot+fa3ef895554bdbfd1183@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=fa3ef895554bdbfd1183 Fixes: 2aab4b969002 ("af_unix: fix struct pid leaks in OOB support") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240203183149.63573-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/unix/garbage.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 2405f0f9af31c..8f63f0b4bf012 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -314,6 +314,17 @@ void unix_gc(void) /* Here we are. Hitlist is filled. Die. */ __skb_queue_purge(&hitlist); +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + list_for_each_entry_safe(u, next, &gc_candidates, link) { + struct sk_buff *skb = u->oob_skb; + + if (skb) { + u->oob_skb = NULL; + kfree_skb(skb); + } + } +#endif + spin_lock(&unix_gc_lock); /* There could be io_uring registered files, just push them back to -- GitLab From 58086721b7781c3e35b19c9b78c8f5a791070ba3 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 5 Feb 2024 18:11:14 +0100 Subject: [PATCH 0749/1405] devlink: avoid potential loop in devlink_rel_nested_in_notify_work() In case devlink_rel_nested_in_notify_work() can not take the devlink lock mutex. Convert the work to delayed work and in case of reschedule do it jiffie later and avoid potential looping. Suggested-by: Paolo Abeni Fixes: c137743bce02 ("devlink: introduce object and nested devlink relationship infra") Signed-off-by: Jiri Pirko Link: https://lore.kernel.org/r/20240205171114.338679-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- net/devlink/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/devlink/core.c b/net/devlink/core.c index 4275a2bc6d8e0..6a58342752b46 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -46,7 +46,7 @@ struct devlink_rel { u32 obj_index; devlink_rel_notify_cb_t *notify_cb; devlink_rel_cleanup_cb_t *cleanup_cb; - struct work_struct notify_work; + struct delayed_work notify_work; } nested_in; }; @@ -70,7 +70,7 @@ static void __devlink_rel_put(struct devlink_rel *rel) static void devlink_rel_nested_in_notify_work(struct work_struct *work) { struct devlink_rel *rel = container_of(work, struct devlink_rel, - nested_in.notify_work); + nested_in.notify_work.work); struct devlink *devlink; devlink = devlinks_xa_get(rel->nested_in.devlink_index); @@ -96,13 +96,13 @@ static void devlink_rel_nested_in_notify_work(struct work_struct *work) return; reschedule_work: - schedule_work(&rel->nested_in.notify_work); + schedule_delayed_work(&rel->nested_in.notify_work, 1); } static void devlink_rel_nested_in_notify_work_schedule(struct devlink_rel *rel) { __devlink_rel_get(rel); - schedule_work(&rel->nested_in.notify_work); + schedule_delayed_work(&rel->nested_in.notify_work, 0); } static struct devlink_rel *devlink_rel_alloc(void) @@ -123,8 +123,8 @@ static struct devlink_rel *devlink_rel_alloc(void) } refcount_set(&rel->refcount, 1); - INIT_WORK(&rel->nested_in.notify_work, - &devlink_rel_nested_in_notify_work); + INIT_DELAYED_WORK(&rel->nested_in.notify_work, + &devlink_rel_nested_in_notify_work); return rel; } -- GitLab From cb88cb53badb8aeb3955ad6ce80b07b598e310b8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Feb 2024 17:10:04 +0000 Subject: [PATCH 0750/1405] ppp_async: limit MRU to 64K syzbot triggered a warning [1] in __alloc_pages(): WARN_ON_ONCE_GFP(order > MAX_PAGE_ORDER, gfp) Willem fixed a similar issue in commit c0a2a1b0d631 ("ppp: limit MRU to 64K") Adopt the same sanity check for ppp_async_ioctl(PPPIOCSMRU) [1]: WARNING: CPU: 1 PID: 11 at mm/page_alloc.c:4543 __alloc_pages+0x308/0x698 mm/page_alloc.c:4543 Modules linked in: CPU: 1 PID: 11 Comm: kworker/u4:0 Not tainted 6.8.0-rc2-syzkaller-g41bccc98fb79 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 Workqueue: events_unbound flush_to_ldisc pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : __alloc_pages+0x308/0x698 mm/page_alloc.c:4543 lr : __alloc_pages+0xc8/0x698 mm/page_alloc.c:4537 sp : ffff800093967580 x29: ffff800093967660 x28: ffff8000939675a0 x27: dfff800000000000 x26: ffff70001272ceb4 x25: 0000000000000000 x24: ffff8000939675c0 x23: 0000000000000000 x22: 0000000000060820 x21: 1ffff0001272ceb8 x20: ffff8000939675e0 x19: 0000000000000010 x18: ffff800093967120 x17: ffff800083bded5c x16: ffff80008ac97500 x15: 0000000000000005 x14: 1ffff0001272cebc x13: 0000000000000000 x12: 0000000000000000 x11: ffff70001272cec1 x10: 1ffff0001272cec0 x9 : 0000000000000001 x8 : ffff800091c91000 x7 : 0000000000000000 x6 : 000000000000003f x5 : 00000000ffffffff x4 : 0000000000000000 x3 : 0000000000000020 x2 : 0000000000000008 x1 : 0000000000000000 x0 : ffff8000939675e0 Call trace: __alloc_pages+0x308/0x698 mm/page_alloc.c:4543 __alloc_pages_node include/linux/gfp.h:238 [inline] alloc_pages_node include/linux/gfp.h:261 [inline] __kmalloc_large_node+0xbc/0x1fc mm/slub.c:3926 __do_kmalloc_node mm/slub.c:3969 [inline] __kmalloc_node_track_caller+0x418/0x620 mm/slub.c:4001 kmalloc_reserve+0x17c/0x23c net/core/skbuff.c:590 __alloc_skb+0x1c8/0x3d8 net/core/skbuff.c:651 __netdev_alloc_skb+0xb8/0x3e8 net/core/skbuff.c:715 netdev_alloc_skb include/linux/skbuff.h:3235 [inline] dev_alloc_skb include/linux/skbuff.h:3248 [inline] ppp_async_input drivers/net/ppp/ppp_async.c:863 [inline] ppp_asynctty_receive+0x588/0x186c drivers/net/ppp/ppp_async.c:341 tty_ldisc_receive_buf+0x12c/0x15c drivers/tty/tty_buffer.c:390 tty_port_default_receive_buf+0x74/0xac drivers/tty/tty_port.c:37 receive_buf drivers/tty/tty_buffer.c:444 [inline] flush_to_ldisc+0x284/0x6e4 drivers/tty/tty_buffer.c:494 process_one_work+0x694/0x1204 kernel/workqueue.c:2633 process_scheduled_works kernel/workqueue.c:2706 [inline] worker_thread+0x938/0xef4 kernel/workqueue.c:2787 kthread+0x288/0x310 kernel/kthread.c:388 ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:860 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-and-tested-by: syzbot+c5da1f087c9e4ec6c933@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240205171004.1059724-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ppp/ppp_async.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index 840da924708b3..125793d8aefa7 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c @@ -460,6 +460,10 @@ ppp_async_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg) case PPPIOCSMRU: if (get_user(val, p)) break; + if (val > U16_MAX) { + err = -EINVAL; + break; + } if (val < PPP_MRU) val = PPP_MRU; ap->mru = val; -- GitLab From 38cc3c6dcc09dc3a1800b5ec22aef643ca11eab8 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Sat, 3 Feb 2024 20:09:27 +0100 Subject: [PATCH 0751/1405] net: stmmac: protect updates of 64-bit statistics counters As explained by a comment in , write side of struct u64_stats_sync must ensure mutual exclusion, or one seqcount update could be lost on 32-bit platforms, thus blocking readers forever. Such lockups have been observed in real world after stmmac_xmit() on one CPU raced with stmmac_napi_poll_tx() on another CPU. To fix the issue without introducing a new lock, split the statics into three parts: 1. fields updated only under the tx queue lock, 2. fields updated only during NAPI poll, 3. fields updated only from interrupt context, Updates to fields in the first two groups are already serialized through other locks. It is sufficient to split the existing struct u64_stats_sync so that each group has its own. Note that tx_set_ic_bit is updated from both contexts. Split this counter so that each context gets its own, and calculate their sum to get the total value in stmmac_get_ethtool_stats(). For the third group, multiple interrupts may be processed by different CPUs at the same time, but interrupts on the same CPU will not nest. Move fields from this group to a newly created per-cpu struct stmmac_pcpu_stats. Fixes: 133466c3bbe1 ("net: stmmac: use per-queue 64 bit statistics where necessary") Link: https://lore.kernel.org/netdev/Za173PhviYg-1qIn@torres.zugschlus.de/t/ Cc: stable@vger.kernel.org Signed-off-by: Petr Tesarik Reviewed-by: Jisheng Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/common.h | 56 +++++--- .../net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 15 +- .../net/ethernet/stmicro/stmmac/dwmac4_lib.c | 15 +- .../net/ethernet/stmicro/stmmac/dwmac_lib.c | 15 +- .../ethernet/stmicro/stmmac/dwxgmac2_dma.c | 15 +- .../ethernet/stmicro/stmmac/stmmac_ethtool.c | 129 +++++++++++------ .../net/ethernet/stmicro/stmmac/stmmac_main.c | 133 +++++++++--------- 7 files changed, 221 insertions(+), 157 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index b4f60ab078d67..5ba606a596e77 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -59,28 +59,51 @@ #undef FRAME_FILTER_DEBUG /* #define FRAME_FILTER_DEBUG */ +struct stmmac_q_tx_stats { + u64_stats_t tx_bytes; + u64_stats_t tx_set_ic_bit; + u64_stats_t tx_tso_frames; + u64_stats_t tx_tso_nfrags; +}; + +struct stmmac_napi_tx_stats { + u64_stats_t tx_packets; + u64_stats_t tx_pkt_n; + u64_stats_t poll; + u64_stats_t tx_clean; + u64_stats_t tx_set_ic_bit; +}; + struct stmmac_txq_stats { - u64 tx_bytes; - u64 tx_packets; - u64 tx_pkt_n; - u64 tx_normal_irq_n; - u64 napi_poll; - u64 tx_clean; - u64 tx_set_ic_bit; - u64 tx_tso_frames; - u64 tx_tso_nfrags; - struct u64_stats_sync syncp; + /* Updates protected by tx queue lock. */ + struct u64_stats_sync q_syncp; + struct stmmac_q_tx_stats q; + + /* Updates protected by NAPI poll logic. */ + struct u64_stats_sync napi_syncp; + struct stmmac_napi_tx_stats napi; } ____cacheline_aligned_in_smp; +struct stmmac_napi_rx_stats { + u64_stats_t rx_bytes; + u64_stats_t rx_packets; + u64_stats_t rx_pkt_n; + u64_stats_t poll; +}; + struct stmmac_rxq_stats { - u64 rx_bytes; - u64 rx_packets; - u64 rx_pkt_n; - u64 rx_normal_irq_n; - u64 napi_poll; - struct u64_stats_sync syncp; + /* Updates protected by NAPI poll logic. */ + struct u64_stats_sync napi_syncp; + struct stmmac_napi_rx_stats napi; } ____cacheline_aligned_in_smp; +/* Updates on each CPU protected by not allowing nested irqs. */ +struct stmmac_pcpu_stats { + struct u64_stats_sync syncp; + u64_stats_t rx_normal_irq_n[MTL_MAX_TX_QUEUES]; + u64_stats_t tx_normal_irq_n[MTL_MAX_RX_QUEUES]; +}; + /* Extra statistic and debug information exposed by ethtool */ struct stmmac_extra_stats { /* Transmit errors */ @@ -205,6 +228,7 @@ struct stmmac_extra_stats { /* per queue statistics */ struct stmmac_txq_stats txq_stats[MTL_MAX_TX_QUEUES]; struct stmmac_rxq_stats rxq_stats[MTL_MAX_RX_QUEUES]; + struct stmmac_pcpu_stats __percpu *pcpu_stats; unsigned long rx_dropped; unsigned long rx_errors; unsigned long tx_dropped; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 137741b94122e..b21d99faa2d04 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -441,8 +441,7 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv, struct stmmac_extra_stats *x, u32 chan, u32 dir) { - struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; - struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan]; + struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats); int ret = 0; u32 v; @@ -455,9 +454,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv, if (v & EMAC_TX_INT) { ret |= handle_tx; - u64_stats_update_begin(&txq_stats->syncp); - txq_stats->tx_normal_irq_n++; - u64_stats_update_end(&txq_stats->syncp); + u64_stats_update_begin(&stats->syncp); + u64_stats_inc(&stats->tx_normal_irq_n[chan]); + u64_stats_update_end(&stats->syncp); } if (v & EMAC_TX_DMA_STOP_INT) @@ -479,9 +478,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv, if (v & EMAC_RX_INT) { ret |= handle_rx; - u64_stats_update_begin(&rxq_stats->syncp); - rxq_stats->rx_normal_irq_n++; - u64_stats_update_end(&rxq_stats->syncp); + u64_stats_update_begin(&stats->syncp); + u64_stats_inc(&stats->rx_normal_irq_n[chan]); + u64_stats_update_end(&stats->syncp); } if (v & EMAC_RX_BUF_UA_INT) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index 9470d3fd2dede..0d185e54eb7e2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -171,8 +171,7 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs; u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(dwmac4_addrs, chan)); u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan)); - struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; - struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan]; + struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats); int ret = 0; if (dir == DMA_DIR_RX) @@ -201,15 +200,15 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, } /* TX/RX NORMAL interrupts */ if (likely(intr_status & DMA_CHAN_STATUS_RI)) { - u64_stats_update_begin(&rxq_stats->syncp); - rxq_stats->rx_normal_irq_n++; - u64_stats_update_end(&rxq_stats->syncp); + u64_stats_update_begin(&stats->syncp); + u64_stats_inc(&stats->rx_normal_irq_n[chan]); + u64_stats_update_end(&stats->syncp); ret |= handle_rx; } if (likely(intr_status & DMA_CHAN_STATUS_TI)) { - u64_stats_update_begin(&txq_stats->syncp); - txq_stats->tx_normal_irq_n++; - u64_stats_update_end(&txq_stats->syncp); + u64_stats_update_begin(&stats->syncp); + u64_stats_inc(&stats->tx_normal_irq_n[chan]); + u64_stats_update_end(&stats->syncp); ret |= handle_tx; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c index 7907d62d34375..85e18f9a22f92 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c @@ -162,8 +162,7 @@ static void show_rx_process_state(unsigned int status) int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, struct stmmac_extra_stats *x, u32 chan, u32 dir) { - struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; - struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan]; + struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats); int ret = 0; /* read the status register (CSR5) */ u32 intr_status = readl(ioaddr + DMA_STATUS); @@ -215,16 +214,16 @@ int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, u32 value = readl(ioaddr + DMA_INTR_ENA); /* to schedule NAPI on real RIE event. */ if (likely(value & DMA_INTR_ENA_RIE)) { - u64_stats_update_begin(&rxq_stats->syncp); - rxq_stats->rx_normal_irq_n++; - u64_stats_update_end(&rxq_stats->syncp); + u64_stats_update_begin(&stats->syncp); + u64_stats_inc(&stats->rx_normal_irq_n[chan]); + u64_stats_update_end(&stats->syncp); ret |= handle_rx; } } if (likely(intr_status & DMA_STATUS_TI)) { - u64_stats_update_begin(&txq_stats->syncp); - txq_stats->tx_normal_irq_n++; - u64_stats_update_end(&txq_stats->syncp); + u64_stats_update_begin(&stats->syncp); + u64_stats_inc(&stats->tx_normal_irq_n[chan]); + u64_stats_update_end(&stats->syncp); ret |= handle_tx; } if (unlikely(intr_status & DMA_STATUS_ERI)) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 3cde695fec91b..dd2ab6185c40e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -337,8 +337,7 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv, struct stmmac_extra_stats *x, u32 chan, u32 dir) { - struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; - struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan]; + struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats); u32 intr_status = readl(ioaddr + XGMAC_DMA_CH_STATUS(chan)); u32 intr_en = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan)); int ret = 0; @@ -367,15 +366,15 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv, /* TX/RX NORMAL interrupts */ if (likely(intr_status & XGMAC_NIS)) { if (likely(intr_status & XGMAC_RI)) { - u64_stats_update_begin(&rxq_stats->syncp); - rxq_stats->rx_normal_irq_n++; - u64_stats_update_end(&rxq_stats->syncp); + u64_stats_update_begin(&stats->syncp); + u64_stats_inc(&stats->rx_normal_irq_n[chan]); + u64_stats_update_end(&stats->syncp); ret |= handle_rx; } if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) { - u64_stats_update_begin(&txq_stats->syncp); - txq_stats->tx_normal_irq_n++; - u64_stats_update_end(&txq_stats->syncp); + u64_stats_update_begin(&stats->syncp); + u64_stats_inc(&stats->tx_normal_irq_n[chan]); + u64_stats_update_end(&stats->syncp); ret |= handle_tx; } } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 42d27b97dd1d0..ec44becf0e2d2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -549,44 +549,79 @@ stmmac_set_pauseparam(struct net_device *netdev, } } +static u64 stmmac_get_rx_normal_irq_n(struct stmmac_priv *priv, int q) +{ + u64 total; + int cpu; + + total = 0; + for_each_possible_cpu(cpu) { + struct stmmac_pcpu_stats *pcpu; + unsigned int start; + u64 irq_n; + + pcpu = per_cpu_ptr(priv->xstats.pcpu_stats, cpu); + do { + start = u64_stats_fetch_begin(&pcpu->syncp); + irq_n = u64_stats_read(&pcpu->rx_normal_irq_n[q]); + } while (u64_stats_fetch_retry(&pcpu->syncp, start)); + total += irq_n; + } + return total; +} + +static u64 stmmac_get_tx_normal_irq_n(struct stmmac_priv *priv, int q) +{ + u64 total; + int cpu; + + total = 0; + for_each_possible_cpu(cpu) { + struct stmmac_pcpu_stats *pcpu; + unsigned int start; + u64 irq_n; + + pcpu = per_cpu_ptr(priv->xstats.pcpu_stats, cpu); + do { + start = u64_stats_fetch_begin(&pcpu->syncp); + irq_n = u64_stats_read(&pcpu->tx_normal_irq_n[q]); + } while (u64_stats_fetch_retry(&pcpu->syncp, start)); + total += irq_n; + } + return total; +} + static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data) { u32 tx_cnt = priv->plat->tx_queues_to_use; u32 rx_cnt = priv->plat->rx_queues_to_use; unsigned int start; - int q, stat; - char *p; + int q; for (q = 0; q < tx_cnt; q++) { struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[q]; - struct stmmac_txq_stats snapshot; + u64 pkt_n; do { - start = u64_stats_fetch_begin(&txq_stats->syncp); - snapshot = *txq_stats; - } while (u64_stats_fetch_retry(&txq_stats->syncp, start)); + start = u64_stats_fetch_begin(&txq_stats->napi_syncp); + pkt_n = u64_stats_read(&txq_stats->napi.tx_pkt_n); + } while (u64_stats_fetch_retry(&txq_stats->napi_syncp, start)); - p = (char *)&snapshot + offsetof(struct stmmac_txq_stats, tx_pkt_n); - for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) { - *data++ = (*(u64 *)p); - p += sizeof(u64); - } + *data++ = pkt_n; + *data++ = stmmac_get_tx_normal_irq_n(priv, q); } for (q = 0; q < rx_cnt; q++) { struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[q]; - struct stmmac_rxq_stats snapshot; + u64 pkt_n; do { - start = u64_stats_fetch_begin(&rxq_stats->syncp); - snapshot = *rxq_stats; - } while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); + start = u64_stats_fetch_begin(&rxq_stats->napi_syncp); + pkt_n = u64_stats_read(&rxq_stats->napi.rx_pkt_n); + } while (u64_stats_fetch_retry(&rxq_stats->napi_syncp, start)); - p = (char *)&snapshot + offsetof(struct stmmac_rxq_stats, rx_pkt_n); - for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) { - *data++ = (*(u64 *)p); - p += sizeof(u64); - } + *data++ = pkt_n; + *data++ = stmmac_get_rx_normal_irq_n(priv, q); } } @@ -645,39 +680,49 @@ static void stmmac_get_ethtool_stats(struct net_device *dev, pos = j; for (i = 0; i < rx_queues_count; i++) { struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[i]; - struct stmmac_rxq_stats snapshot; + struct stmmac_napi_rx_stats snapshot; + u64 n_irq; j = pos; do { - start = u64_stats_fetch_begin(&rxq_stats->syncp); - snapshot = *rxq_stats; - } while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); - - data[j++] += snapshot.rx_pkt_n; - data[j++] += snapshot.rx_normal_irq_n; - normal_irq_n += snapshot.rx_normal_irq_n; - napi_poll += snapshot.napi_poll; + start = u64_stats_fetch_begin(&rxq_stats->napi_syncp); + snapshot = rxq_stats->napi; + } while (u64_stats_fetch_retry(&rxq_stats->napi_syncp, start)); + + data[j++] += u64_stats_read(&snapshot.rx_pkt_n); + n_irq = stmmac_get_rx_normal_irq_n(priv, i); + data[j++] += n_irq; + normal_irq_n += n_irq; + napi_poll += u64_stats_read(&snapshot.poll); } pos = j; for (i = 0; i < tx_queues_count; i++) { struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[i]; - struct stmmac_txq_stats snapshot; + struct stmmac_napi_tx_stats napi_snapshot; + struct stmmac_q_tx_stats q_snapshot; + u64 n_irq; j = pos; do { - start = u64_stats_fetch_begin(&txq_stats->syncp); - snapshot = *txq_stats; - } while (u64_stats_fetch_retry(&txq_stats->syncp, start)); - - data[j++] += snapshot.tx_pkt_n; - data[j++] += snapshot.tx_normal_irq_n; - normal_irq_n += snapshot.tx_normal_irq_n; - data[j++] += snapshot.tx_clean; - data[j++] += snapshot.tx_set_ic_bit; - data[j++] += snapshot.tx_tso_frames; - data[j++] += snapshot.tx_tso_nfrags; - napi_poll += snapshot.napi_poll; + start = u64_stats_fetch_begin(&txq_stats->q_syncp); + q_snapshot = txq_stats->q; + } while (u64_stats_fetch_retry(&txq_stats->q_syncp, start)); + do { + start = u64_stats_fetch_begin(&txq_stats->napi_syncp); + napi_snapshot = txq_stats->napi; + } while (u64_stats_fetch_retry(&txq_stats->napi_syncp, start)); + + data[j++] += u64_stats_read(&napi_snapshot.tx_pkt_n); + n_irq = stmmac_get_tx_normal_irq_n(priv, i); + data[j++] += n_irq; + normal_irq_n += n_irq; + data[j++] += u64_stats_read(&napi_snapshot.tx_clean); + data[j++] += u64_stats_read(&q_snapshot.tx_set_ic_bit) + + u64_stats_read(&napi_snapshot.tx_set_ic_bit); + data[j++] += u64_stats_read(&q_snapshot.tx_tso_frames); + data[j++] += u64_stats_read(&q_snapshot.tx_tso_nfrags); + napi_poll += u64_stats_read(&napi_snapshot.poll); } normal_irq_n += priv->xstats.rx_early_irq; data[j++] = normal_irq_n; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 25519952f754c..75d0297045032 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2482,7 +2482,6 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) struct xdp_desc xdp_desc; bool work_done = true; u32 tx_set_ic_bit = 0; - unsigned long flags; /* Avoids TX time-out as we are sharing with slow path */ txq_trans_cond_update(nq); @@ -2566,9 +2565,9 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size); entry = tx_q->cur_tx; } - flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); - txq_stats->tx_set_ic_bit += tx_set_ic_bit; - u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); + u64_stats_update_begin(&txq_stats->napi_syncp); + u64_stats_add(&txq_stats->napi.tx_set_ic_bit, tx_set_ic_bit); + u64_stats_update_end(&txq_stats->napi_syncp); if (tx_desc) { stmmac_flush_tx_descriptors(priv, queue); @@ -2616,7 +2615,6 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue, unsigned int bytes_compl = 0, pkts_compl = 0; unsigned int entry, xmits = 0, count = 0; u32 tx_packets = 0, tx_errors = 0; - unsigned long flags; __netif_tx_lock_bh(netdev_get_tx_queue(priv->dev, queue)); @@ -2782,11 +2780,11 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue, if (tx_q->dirty_tx != tx_q->cur_tx) *pending_packets = true; - flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); - txq_stats->tx_packets += tx_packets; - txq_stats->tx_pkt_n += tx_packets; - txq_stats->tx_clean++; - u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); + u64_stats_update_begin(&txq_stats->napi_syncp); + u64_stats_add(&txq_stats->napi.tx_packets, tx_packets); + u64_stats_add(&txq_stats->napi.tx_pkt_n, tx_packets); + u64_stats_inc(&txq_stats->napi.tx_clean); + u64_stats_update_end(&txq_stats->napi_syncp); priv->xstats.tx_errors += tx_errors; @@ -4213,7 +4211,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) struct stmmac_tx_queue *tx_q; bool has_vlan, set_ic; u8 proto_hdr_len, hdr; - unsigned long flags; u32 pay_len, mss; dma_addr_t des; int i; @@ -4378,13 +4375,13 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue)); } - flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); - txq_stats->tx_bytes += skb->len; - txq_stats->tx_tso_frames++; - txq_stats->tx_tso_nfrags += nfrags; + u64_stats_update_begin(&txq_stats->q_syncp); + u64_stats_add(&txq_stats->q.tx_bytes, skb->len); + u64_stats_inc(&txq_stats->q.tx_tso_frames); + u64_stats_add(&txq_stats->q.tx_tso_nfrags, nfrags); if (set_ic) - txq_stats->tx_set_ic_bit++; - u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); + u64_stats_inc(&txq_stats->q.tx_set_ic_bit); + u64_stats_update_end(&txq_stats->q_syncp); if (priv->sarc_type) stmmac_set_desc_sarc(priv, first, priv->sarc_type); @@ -4483,7 +4480,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) struct stmmac_tx_queue *tx_q; bool has_vlan, set_ic; int entry, first_tx; - unsigned long flags; dma_addr_t des; tx_q = &priv->dma_conf.tx_queue[queue]; @@ -4653,11 +4649,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue)); } - flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); - txq_stats->tx_bytes += skb->len; + u64_stats_update_begin(&txq_stats->q_syncp); + u64_stats_add(&txq_stats->q.tx_bytes, skb->len); if (set_ic) - txq_stats->tx_set_ic_bit++; - u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); + u64_stats_inc(&txq_stats->q.tx_set_ic_bit); + u64_stats_update_end(&txq_stats->q_syncp); if (priv->sarc_type) stmmac_set_desc_sarc(priv, first, priv->sarc_type); @@ -4921,12 +4917,11 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, set_ic = false; if (set_ic) { - unsigned long flags; tx_q->tx_count_frames = 0; stmmac_set_tx_ic(priv, tx_desc); - flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); - txq_stats->tx_set_ic_bit++; - u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); + u64_stats_update_begin(&txq_stats->q_syncp); + u64_stats_inc(&txq_stats->q.tx_set_ic_bit); + u64_stats_update_end(&txq_stats->q_syncp); } stmmac_enable_dma_transmission(priv, priv->ioaddr); @@ -5076,7 +5071,6 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue, unsigned int len = xdp->data_end - xdp->data; enum pkt_hash_types hash_type; int coe = priv->hw->rx_csum; - unsigned long flags; struct sk_buff *skb; u32 hash; @@ -5106,10 +5100,10 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue, skb_record_rx_queue(skb, queue); napi_gro_receive(&ch->rxtx_napi, skb); - flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); - rxq_stats->rx_pkt_n++; - rxq_stats->rx_bytes += len; - u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); + u64_stats_update_begin(&rxq_stats->napi_syncp); + u64_stats_inc(&rxq_stats->napi.rx_pkt_n); + u64_stats_add(&rxq_stats->napi.rx_bytes, len); + u64_stats_update_end(&rxq_stats->napi_syncp); } static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget) @@ -5191,7 +5185,6 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue) unsigned int desc_size; struct bpf_prog *prog; bool failure = false; - unsigned long flags; int xdp_status = 0; int status = 0; @@ -5346,9 +5339,9 @@ static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue) stmmac_finalize_xdp_rx(priv, xdp_status); - flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); - rxq_stats->rx_pkt_n += count; - u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); + u64_stats_update_begin(&rxq_stats->napi_syncp); + u64_stats_add(&rxq_stats->napi.rx_pkt_n, count); + u64_stats_update_end(&rxq_stats->napi_syncp); priv->xstats.rx_dropped += rx_dropped; priv->xstats.rx_errors += rx_errors; @@ -5386,7 +5379,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) unsigned int desc_size; struct sk_buff *skb = NULL; struct stmmac_xdp_buff ctx; - unsigned long flags; int xdp_status = 0; int buf_sz; @@ -5646,11 +5638,11 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) stmmac_rx_refill(priv, queue); - flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); - rxq_stats->rx_packets += rx_packets; - rxq_stats->rx_bytes += rx_bytes; - rxq_stats->rx_pkt_n += count; - u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); + u64_stats_update_begin(&rxq_stats->napi_syncp); + u64_stats_add(&rxq_stats->napi.rx_packets, rx_packets); + u64_stats_add(&rxq_stats->napi.rx_bytes, rx_bytes); + u64_stats_add(&rxq_stats->napi.rx_pkt_n, count); + u64_stats_update_end(&rxq_stats->napi_syncp); priv->xstats.rx_dropped += rx_dropped; priv->xstats.rx_errors += rx_errors; @@ -5665,13 +5657,12 @@ static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget) struct stmmac_priv *priv = ch->priv_data; struct stmmac_rxq_stats *rxq_stats; u32 chan = ch->index; - unsigned long flags; int work_done; rxq_stats = &priv->xstats.rxq_stats[chan]; - flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); - rxq_stats->napi_poll++; - u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); + u64_stats_update_begin(&rxq_stats->napi_syncp); + u64_stats_inc(&rxq_stats->napi.poll); + u64_stats_update_end(&rxq_stats->napi_syncp); work_done = stmmac_rx(priv, budget, chan); if (work_done < budget && napi_complete_done(napi, work_done)) { @@ -5693,13 +5684,12 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget) struct stmmac_txq_stats *txq_stats; bool pending_packets = false; u32 chan = ch->index; - unsigned long flags; int work_done; txq_stats = &priv->xstats.txq_stats[chan]; - flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); - txq_stats->napi_poll++; - u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); + u64_stats_update_begin(&txq_stats->napi_syncp); + u64_stats_inc(&txq_stats->napi.poll); + u64_stats_update_end(&txq_stats->napi_syncp); work_done = stmmac_tx_clean(priv, budget, chan, &pending_packets); work_done = min(work_done, budget); @@ -5729,17 +5719,16 @@ static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget) struct stmmac_rxq_stats *rxq_stats; struct stmmac_txq_stats *txq_stats; u32 chan = ch->index; - unsigned long flags; rxq_stats = &priv->xstats.rxq_stats[chan]; - flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp); - rxq_stats->napi_poll++; - u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags); + u64_stats_update_begin(&rxq_stats->napi_syncp); + u64_stats_inc(&rxq_stats->napi.poll); + u64_stats_update_end(&rxq_stats->napi_syncp); txq_stats = &priv->xstats.txq_stats[chan]; - flags = u64_stats_update_begin_irqsave(&txq_stats->syncp); - txq_stats->napi_poll++; - u64_stats_update_end_irqrestore(&txq_stats->syncp, flags); + u64_stats_update_begin(&txq_stats->napi_syncp); + u64_stats_inc(&txq_stats->napi.poll); + u64_stats_update_end(&txq_stats->napi_syncp); tx_done = stmmac_tx_clean(priv, budget, chan, &tx_pending_packets); tx_done = min(tx_done, budget); @@ -7065,10 +7054,13 @@ static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64 u64 tx_bytes; do { - start = u64_stats_fetch_begin(&txq_stats->syncp); - tx_packets = txq_stats->tx_packets; - tx_bytes = txq_stats->tx_bytes; - } while (u64_stats_fetch_retry(&txq_stats->syncp, start)); + start = u64_stats_fetch_begin(&txq_stats->q_syncp); + tx_bytes = u64_stats_read(&txq_stats->q.tx_bytes); + } while (u64_stats_fetch_retry(&txq_stats->q_syncp, start)); + do { + start = u64_stats_fetch_begin(&txq_stats->napi_syncp); + tx_packets = u64_stats_read(&txq_stats->napi.tx_packets); + } while (u64_stats_fetch_retry(&txq_stats->napi_syncp, start)); stats->tx_packets += tx_packets; stats->tx_bytes += tx_bytes; @@ -7080,10 +7072,10 @@ static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64 u64 rx_bytes; do { - start = u64_stats_fetch_begin(&rxq_stats->syncp); - rx_packets = rxq_stats->rx_packets; - rx_bytes = rxq_stats->rx_bytes; - } while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); + start = u64_stats_fetch_begin(&rxq_stats->napi_syncp); + rx_packets = u64_stats_read(&rxq_stats->napi.rx_packets); + rx_bytes = u64_stats_read(&rxq_stats->napi.rx_bytes); + } while (u64_stats_fetch_retry(&rxq_stats->napi_syncp, start)); stats->rx_packets += rx_packets; stats->rx_bytes += rx_bytes; @@ -7477,9 +7469,16 @@ int stmmac_dvr_probe(struct device *device, priv->dev = ndev; for (i = 0; i < MTL_MAX_RX_QUEUES; i++) - u64_stats_init(&priv->xstats.rxq_stats[i].syncp); - for (i = 0; i < MTL_MAX_TX_QUEUES; i++) - u64_stats_init(&priv->xstats.txq_stats[i].syncp); + u64_stats_init(&priv->xstats.rxq_stats[i].napi_syncp); + for (i = 0; i < MTL_MAX_TX_QUEUES; i++) { + u64_stats_init(&priv->xstats.txq_stats[i].q_syncp); + u64_stats_init(&priv->xstats.txq_stats[i].napi_syncp); + } + + priv->xstats.pcpu_stats = + devm_netdev_alloc_pcpu_stats(device, struct stmmac_pcpu_stats); + if (!priv->xstats.pcpu_stats) + return -ENOMEM; stmmac_set_ethtool_ops(ndev); priv->pause = pause; -- GitLab From d1722057477a3786b8c0d60c28fc281f6ecf1cc3 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 6 Feb 2024 11:38:49 +0000 Subject: [PATCH 0752/1405] ASoC: cs42l43: Handle error from devm_pm_runtime_enable As devm_pm_runtime_enable can fail due to memory allocations, it is best to handle the error. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20240206113850.719888-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c index 6a64681767de8..4d2a5584aa576 100644 --- a/sound/soc/codecs/cs42l43.c +++ b/sound/soc/codecs/cs42l43.c @@ -2257,7 +2257,10 @@ static int cs42l43_codec_probe(struct platform_device *pdev) pm_runtime_use_autosuspend(priv->dev); pm_runtime_set_active(priv->dev); pm_runtime_get_noresume(priv->dev); - devm_pm_runtime_enable(priv->dev); + + ret = devm_pm_runtime_enable(priv->dev); + if (ret) + goto err_pm; for (i = 0; i < ARRAY_SIZE(cs42l43_irqs); i++) { ret = cs42l43_request_irq(priv, dom, cs42l43_irqs[i].name, -- GitLab From 64353af49fecbdec1de9aadf2369d54fc00f1899 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 6 Feb 2024 11:38:50 +0000 Subject: [PATCH 0753/1405] ASoC: cs42l43: Add system suspend ops to disable IRQ The IRQ should be disabled whilst entering and exiting system suspend to avoid the IRQ handler being called whilst the PM runtime is disabled. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20240206113850.719888-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43.c | 43 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c index 4d2a5584aa576..a97ccb512deba 100644 --- a/sound/soc/codecs/cs42l43.c +++ b/sound/soc/codecs/cs42l43.c @@ -2336,8 +2336,47 @@ static int cs42l43_codec_runtime_resume(struct device *dev) return 0; } -static DEFINE_RUNTIME_DEV_PM_OPS(cs42l43_codec_pm_ops, NULL, - cs42l43_codec_runtime_resume, NULL); +static int cs42l43_codec_suspend(struct device *dev) +{ + struct cs42l43 *cs42l43 = dev_get_drvdata(dev); + + disable_irq(cs42l43->irq); + + return 0; +} + +static int cs42l43_codec_suspend_noirq(struct device *dev) +{ + struct cs42l43 *cs42l43 = dev_get_drvdata(dev); + + enable_irq(cs42l43->irq); + + return 0; +} + +static int cs42l43_codec_resume(struct device *dev) +{ + struct cs42l43 *cs42l43 = dev_get_drvdata(dev); + + enable_irq(cs42l43->irq); + + return 0; +} + +static int cs42l43_codec_resume_noirq(struct device *dev) +{ + struct cs42l43 *cs42l43 = dev_get_drvdata(dev); + + disable_irq(cs42l43->irq); + + return 0; +} + +static const struct dev_pm_ops cs42l43_codec_pm_ops = { + SYSTEM_SLEEP_PM_OPS(cs42l43_codec_suspend, cs42l43_codec_resume) + NOIRQ_SYSTEM_SLEEP_PM_OPS(cs42l43_codec_suspend_noirq, cs42l43_codec_resume_noirq) + RUNTIME_PM_OPS(NULL, cs42l43_codec_runtime_resume, NULL) +}; static const struct platform_device_id cs42l43_codec_id_table[] = { { "cs42l43-codec", }, -- GitLab From 7011b51f13b391ee06708bb1f82653a2953f8cfc Mon Sep 17 00:00:00 2001 From: Ben Wolsieffer Date: Tue, 6 Feb 2024 10:10:05 -0500 Subject: [PATCH 0754/1405] regmap: kunit: fix raw noinc write test wrapping The raw noinc write test places a known value in the register following the noinc register to verify that it is not disturbed by the noinc write. This test ensures this value is distinct by adding 100 to the second element of the noinc write data. The regmap registers are 16-bit, while the test value is stored in an unsigned int. Therefore, adding 100 may cause the register to wrap while the test value does not, causing the test to fail. This patch fixes this by changing val_test and val_last from unsigned int to u16. Signed-off-by: Ben Wolsieffer Reported-by: Guenter Roeck Closes: https://lore.kernel.org/linux-kernel/745d3a11-15bc-48b6-84c8-c8761c943bed@roeck-us.net/T/ Tested-by: Guenter Roeck Link: https://lore.kernel.org/r/20240206151004.1636761-2-ben.wolsieffer@hefring.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-kunit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap-kunit.c b/drivers/base/regmap/regmap-kunit.c index 026bdcb45127f..4eb18f5d32655 100644 --- a/drivers/base/regmap/regmap-kunit.c +++ b/drivers/base/regmap/regmap-kunit.c @@ -1202,7 +1202,8 @@ static void raw_noinc_write(struct kunit *test) struct regmap *map; struct regmap_config config; struct regmap_ram_data *data; - unsigned int val, val_test, val_last; + unsigned int val; + u16 val_test, val_last; u16 val_array[BLOCK_TEST_SIZE]; config = raw_regmap_config; -- GitLab From 4b00d0c513da58b68df015968721b11396fe4ab3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 4 Feb 2024 08:56:18 -0800 Subject: [PATCH 0755/1405] selftests: cmsg_ipv6: repeat the exact packet cmsg_ipv6 test requests tcpdump to capture 4 packets, and sends until tcpdump quits. Only the first packet is "real", however, and the rest are basic UDP packets. So if tcpdump doesn't start in time it will miss the real packet and only capture the UDP ones. This makes the test fail on slow machine (no KVM or with debug enabled) 100% of the time, while it passes in fast environments. Repeat the "real" / expected packet. Fixes: 9657ad09e1fa ("selftests: net: test IPV6_TCLASS") Fixes: 05ae83d5a4a2 ("selftests: net: test IPV6_HOPLIMIT") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- tools/testing/selftests/net/cmsg_ipv6.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh index f30bd57d5e387..8bc23fb4c82b7 100755 --- a/tools/testing/selftests/net/cmsg_ipv6.sh +++ b/tools/testing/selftests/net/cmsg_ipv6.sh @@ -89,7 +89,7 @@ for ovr in setsock cmsg both diff; do check_result $? 0 "TCLASS $prot $ovr - pass" while [ -d /proc/$BG ]; do - $NSEXE ./cmsg_sender -6 -p u $TGT6 1234 + $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234 done tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null @@ -126,7 +126,7 @@ for ovr in setsock cmsg both diff; do check_result $? 0 "HOPLIMIT $prot $ovr - pass" while [ -d /proc/$BG ]; do - $NSEXE ./cmsg_sender -6 -p u $TGT6 1234 + $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234 done tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null -- GitLab From cd7d469c25704d414d71bf3644f163fb74e7996b Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 13 Oct 2023 13:55:44 +0800 Subject: [PATCH 0756/1405] libceph: fail sparse-read if the data length doesn't match Once this happens that means there have bugs. Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 3 ++- net/ceph/osd_client.c | 18 +++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index fa018d5864e74..f66f6aac74f6f 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -45,6 +45,7 @@ enum ceph_sparse_read_state { CEPH_SPARSE_READ_HDR = 0, CEPH_SPARSE_READ_EXTENTS, CEPH_SPARSE_READ_DATA_LEN, + CEPH_SPARSE_READ_DATA_PRE, CEPH_SPARSE_READ_DATA, }; @@ -64,7 +65,7 @@ struct ceph_sparse_read { u64 sr_req_len; /* orig request length */ u64 sr_pos; /* current pos in buffer */ int sr_index; /* current extent index */ - __le32 sr_datalen; /* length of actual data */ + u32 sr_datalen; /* length of actual data */ u32 sr_count; /* extent count in reply */ int sr_ext_len; /* length of extent array */ struct ceph_sparse_extent *sr_extent; /* extent array */ diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 625622016f576..2cea35e4ff8ef 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -5857,8 +5857,8 @@ static int osd_sparse_read(struct ceph_connection *con, struct ceph_osd *o = con->private; struct ceph_sparse_read *sr = &o->o_sparse_read; u32 count = sr->sr_count; - u64 eoff, elen; - int ret; + u64 eoff, elen, len = 0; + int i, ret; switch (sr->sr_state) { case CEPH_SPARSE_READ_HDR: @@ -5903,8 +5903,20 @@ static int osd_sparse_read(struct ceph_connection *con, convert_extent_map(sr); ret = sizeof(sr->sr_datalen); *pbuf = (char *)&sr->sr_datalen; - sr->sr_state = CEPH_SPARSE_READ_DATA; + sr->sr_state = CEPH_SPARSE_READ_DATA_PRE; break; + case CEPH_SPARSE_READ_DATA_PRE: + /* Convert sr_datalen to host-endian */ + sr->sr_datalen = le32_to_cpu((__force __le32)sr->sr_datalen); + for (i = 0; i < count; i++) + len += sr->sr_extent[i].len; + if (sr->sr_datalen != len) { + pr_warn_ratelimited("data len %u != extent len %llu\n", + sr->sr_datalen, len); + return -EREMOTEIO; + } + sr->sr_state = CEPH_SPARSE_READ_DATA; + fallthrough; case CEPH_SPARSE_READ_DATA: if (sr->sr_index >= count) { sr->sr_state = CEPH_SPARSE_READ_HDR; -- GitLab From ee97302fbc0c98a25732d736fc73aaf4d62c4128 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 14 Dec 2023 09:21:15 +0800 Subject: [PATCH 0757/1405] libceph: rename read_sparse_msg_*() to read_partial_sparse_msg_*() These functions are supposed to behave like other read_partial_*() handlers: the contract with messenger v1 is that the handler bails if the area of the message it's responsible for is already processed. This comes up when handling short reads from the socket. [ idryomov: changelog ] Signed-off-by: Xiubo Li Acked-by: Jeff Layton Signed-off-by: Ilya Dryomov --- net/ceph/messenger_v1.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c index f9a50d7f0d204..4cb60bacf5f5b 100644 --- a/net/ceph/messenger_v1.c +++ b/net/ceph/messenger_v1.c @@ -991,7 +991,7 @@ static inline int read_partial_message_section(struct ceph_connection *con, return read_partial_message_chunk(con, section, sec_len, crc); } -static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc) +static int read_partial_sparse_msg_extent(struct ceph_connection *con, u32 *crc) { struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE); @@ -1026,7 +1026,7 @@ static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc) return 1; } -static int read_sparse_msg_data(struct ceph_connection *con) +static int read_partial_sparse_msg_data(struct ceph_connection *con) { struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); @@ -1043,7 +1043,7 @@ static int read_sparse_msg_data(struct ceph_connection *con) con->v1.in_sr_len, &crc); else if (cursor->sr_resid > 0) - ret = read_sparse_msg_extent(con, &crc); + ret = read_partial_sparse_msg_extent(con, &crc); if (ret <= 0) { if (do_datacrc) @@ -1254,7 +1254,7 @@ static int read_partial_message(struct ceph_connection *con) return -EIO; if (m->sparse_read) - ret = read_sparse_msg_data(con); + ret = read_partial_sparse_msg_data(con); else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) ret = read_partial_msg_data_bounce(con); else -- GitLab From 8e46a2d068c92a905d01cbb018b00d66991585ab Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 14 Dec 2023 16:01:03 +0800 Subject: [PATCH 0758/1405] libceph: just wait for more data to be available on the socket A short read may occur while reading the message footer from the socket. Later, when the socket is ready for another read, the messenger invokes all read_partial_*() handlers, including read_partial_sparse_msg_data(). The expectation is that read_partial_sparse_msg_data() would bail, allowing the messenger to invoke read_partial() for the footer and pick up where it left off. However read_partial_sparse_msg_data() violates that and ends up calling into the state machine in the OSD client. The sparse-read state machine assumes that it's a new op and interprets some piece of the footer as the sparse-read header and returns bogus extents/data length, etc. To determine whether read_partial_sparse_msg_data() should bail, let's reuse cursor->total_resid. Because once it reaches to zero that means all the extents and data have been successfully received in last read, else it could break out when partially reading any of the extents and data. And then osd_sparse_read() could continue where it left off. [ idryomov: changelog ] Link: https://tracker.ceph.com/issues/63586 Fixes: d396f89db39a ("libceph: add sparse read support to msgr1") Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 2 +- net/ceph/messenger_v1.c | 25 +++++++++++++------------ net/ceph/messenger_v2.c | 4 ++-- net/ceph/osd_client.c | 9 +++------ 4 files changed, 19 insertions(+), 21 deletions(-) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 2eaaabbe98cb6..1717cc57cdacd 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -283,7 +283,7 @@ struct ceph_msg { struct kref kref; bool more_to_follow; bool needs_out_seq; - bool sparse_read; + u64 sparse_read_total; int front_alloc_len; struct ceph_msgpool *pool; diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c index 4cb60bacf5f5b..0cb61c76b9b87 100644 --- a/net/ceph/messenger_v1.c +++ b/net/ceph/messenger_v1.c @@ -160,8 +160,9 @@ static size_t sizeof_footer(struct ceph_connection *con) static void prepare_message_data(struct ceph_msg *msg, u32 data_len) { /* Initialize data cursor if it's not a sparse read */ - if (!msg->sparse_read) - ceph_msg_data_cursor_init(&msg->cursor, msg, data_len); + u64 len = msg->sparse_read_total ? : data_len; + + ceph_msg_data_cursor_init(&msg->cursor, msg, len); } /* @@ -1036,7 +1037,7 @@ static int read_partial_sparse_msg_data(struct ceph_connection *con) if (do_datacrc) crc = con->in_data_crc; - do { + while (cursor->total_resid) { if (con->v1.in_sr_kvec.iov_base) ret = read_partial_message_chunk(con, &con->v1.in_sr_kvec, @@ -1044,23 +1045,23 @@ static int read_partial_sparse_msg_data(struct ceph_connection *con) &crc); else if (cursor->sr_resid > 0) ret = read_partial_sparse_msg_extent(con, &crc); - - if (ret <= 0) { - if (do_datacrc) - con->in_data_crc = crc; - return ret; - } + if (ret <= 0) + break; memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec)); ret = con->ops->sparse_read(con, cursor, (char **)&con->v1.in_sr_kvec.iov_base); + if (ret <= 0) { + ret = ret ? ret : 1; /* must return > 0 to indicate success */ + break; + } con->v1.in_sr_len = ret; - } while (ret > 0); + } if (do_datacrc) con->in_data_crc = crc; - return ret < 0 ? ret : 1; /* must return > 0 to indicate success */ + return ret; } static int read_partial_msg_data(struct ceph_connection *con) @@ -1253,7 +1254,7 @@ static int read_partial_message(struct ceph_connection *con) if (!m->num_data_items) return -EIO; - if (m->sparse_read) + if (m->sparse_read_total) ret = read_partial_sparse_msg_data(con); else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) ret = read_partial_msg_data_bounce(con); diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index f8ec60e1aba3a..a0ca5414b333d 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -1128,7 +1128,7 @@ static int decrypt_tail(struct ceph_connection *con) struct sg_table enc_sgt = {}; struct sg_table sgt = {}; struct page **pages = NULL; - bool sparse = con->in_msg->sparse_read; + bool sparse = !!con->in_msg->sparse_read_total; int dpos = 0; int tail_len; int ret; @@ -2060,7 +2060,7 @@ static int prepare_read_tail_plain(struct ceph_connection *con) } if (data_len(msg)) { - if (msg->sparse_read) + if (msg->sparse_read_total) con->v2.in_state = IN_S_PREPARE_SPARSE_DATA; else con->v2.in_state = IN_S_PREPARE_READ_DATA; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 2cea35e4ff8ef..9d078b37fe0b9 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -5510,7 +5510,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, } m = ceph_msg_get(req->r_reply); - m->sparse_read = (bool)srlen; + m->sparse_read_total = srlen; dout("get_reply tid %lld %p\n", tid, m); @@ -5777,11 +5777,8 @@ static int prep_next_sparse_read(struct ceph_connection *con, } if (o->o_sparse_op_idx < 0) { - u64 srlen = sparse_data_requested(req); - - dout("%s: [%d] starting new sparse read req. srlen=0x%llx\n", - __func__, o->o_osd, srlen); - ceph_msg_data_cursor_init(cursor, con->in_msg, srlen); + dout("%s: [%d] starting new sparse read req\n", + __func__, o->o_osd); } else { u64 end; -- GitLab From bbb20ea993f46743f7429092ddc52f1a5c5428ef Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 18 Jan 2024 14:24:41 +0800 Subject: [PATCH 0759/1405] ceph: always set initial i_blkbits to CEPH_FSCRYPT_BLOCK_SHIFT The fscrypt code will use i_blkbits to setup ci_data_unit_bits when allocating the new inode, but ceph will initiate i_blkbits ater when filling the inode, which is too late. Since ci_data_unit_bits will only be used by the fscrypt framework so initiating i_blkbits with CEPH_FSCRYPT_BLOCK_SHIFT is safe. Link: https://tracker.ceph.com/issues/64035 Fixes: 5b1188847180 ("fscrypt: support crypto data unit size less than filesystem block size") Signed-off-by: Xiubo Li Reviewed-by: Eric Biggers Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 0c25d326afc41..7b2e77517f235 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -78,6 +78,8 @@ struct inode *ceph_new_inode(struct inode *dir, struct dentry *dentry, if (!inode) return ERR_PTR(-ENOMEM); + inode->i_blkbits = CEPH_FSCRYPT_BLOCK_SHIFT; + if (!S_ISLNK(*mode)) { err = ceph_pre_init_acls(dir, mode, as_ctx); if (err < 0) -- GitLab From cda4672da1c26835dcbd7aec2bfed954eda9b5ef Mon Sep 17 00:00:00 2001 From: Rishabh Dave Date: Thu, 1 Feb 2024 17:07:16 +0530 Subject: [PATCH 0760/1405] ceph: prevent use-after-free in encode_cap_msg() In fs/ceph/caps.c, in encode_cap_msg(), "use after free" error was caught by KASAN at this line - 'ceph_buffer_get(arg->xattr_buf);'. This implies before the refcount could be increment here, it was freed. In same file, in "handle_cap_grant()" refcount is decremented by this line - 'ceph_buffer_put(ci->i_xattrs.blob);'. It appears that a race occurred and resource was freed by the latter line before the former line could increment it. encode_cap_msg() is called by __send_cap() and __send_cap() is called by ceph_check_caps() after calling __prep_cap(). __prep_cap() is where arg->xattr_buf is assigned to ci->i_xattrs.blob. This is the spot where the refcount must be increased to prevent "use after free" error. Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/59259 Signed-off-by: Rishabh Dave Reviewed-by: Jeff Layton Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 9c02f328c966c..e8bf082105d87 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1452,7 +1452,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap, if (flushing & CEPH_CAP_XATTR_EXCL) { arg->old_xattr_buf = __ceph_build_xattrs_blob(ci); arg->xattr_version = ci->i_xattrs.version; - arg->xattr_buf = ci->i_xattrs.blob; + arg->xattr_buf = ceph_buffer_get(ci->i_xattrs.blob); } else { arg->xattr_buf = NULL; arg->old_xattr_buf = NULL; @@ -1553,6 +1553,7 @@ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci) encode_cap_msg(msg, arg); ceph_con_send(&arg->session->s_con, msg); ceph_buffer_put(arg->old_xattr_buf); + ceph_buffer_put(arg->xattr_buf); if (arg->wake) wake_up_all(&ci->i_cap_wq); } -- GitLab From 07045648c07c5632e0dfd5ce084d3cd0cec0258a Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 4 Jan 2024 09:21:30 +0800 Subject: [PATCH 0761/1405] ceph: always check dir caps asynchronously The MDS will issue the 'Fr' caps for async dirop, while there is buggy in kclient and it could miss releasing the async dirop caps, which is 'Fsxr'. And then the MDS will complain with: "[WRN] client.xxx isn't responding to mclientcaps(revoke) ..." So when releasing the dirop async requests or when they fail we should always make sure that being revoked caps could be released. Link: https://tracker.ceph.com/issues/50223 Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 6 ------ fs/ceph/mds_client.c | 9 ++++----- fs/ceph/mds_client.h | 2 +- fs/ceph/super.h | 2 -- 4 files changed, 5 insertions(+), 14 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index e8bf082105d87..ad1f46c66fbff 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3216,7 +3216,6 @@ static int ceph_try_drop_cap_snap(struct ceph_inode_info *ci, enum put_cap_refs_mode { PUT_CAP_REFS_SYNC = 0, - PUT_CAP_REFS_NO_CHECK, PUT_CAP_REFS_ASYNC, }; @@ -3332,11 +3331,6 @@ void ceph_put_cap_refs_async(struct ceph_inode_info *ci, int had) __ceph_put_cap_refs(ci, had, PUT_CAP_REFS_ASYNC); } -void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci, int had) -{ - __ceph_put_cap_refs(ci, had, PUT_CAP_REFS_NO_CHECK); -} - /* * Release @nr WRBUFFER refs on dirty pages for the given @snapc snap * context. Adjust per-snap dirty page accounting as appropriate. diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 548d1de379f35..f71bb9c9569fc 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1089,7 +1089,7 @@ void ceph_mdsc_release_request(struct kref *kref) struct ceph_mds_request *req = container_of(kref, struct ceph_mds_request, r_kref); - ceph_mdsc_release_dir_caps_no_check(req); + ceph_mdsc_release_dir_caps_async(req); destroy_reply_info(&req->r_reply_info); if (req->r_request) ceph_msg_put(req->r_request); @@ -4261,7 +4261,7 @@ void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req) } } -void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req) +void ceph_mdsc_release_dir_caps_async(struct ceph_mds_request *req) { struct ceph_client *cl = req->r_mdsc->fsc->client; int dcaps; @@ -4269,8 +4269,7 @@ void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req) dcaps = xchg(&req->r_dir_caps, 0); if (dcaps) { doutc(cl, "releasing r_dir_caps=%s\n", ceph_cap_string(dcaps)); - ceph_put_cap_refs_no_check_caps(ceph_inode(req->r_parent), - dcaps); + ceph_put_cap_refs_async(ceph_inode(req->r_parent), dcaps); } } @@ -4306,7 +4305,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, if (req->r_session->s_mds != session->s_mds) continue; - ceph_mdsc_release_dir_caps_no_check(req); + ceph_mdsc_release_dir_caps_async(req); __send_request(session, req, true); } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 2e6ddaa13d725..40560af388272 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -552,7 +552,7 @@ extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, struct inode *dir, struct ceph_mds_request *req); extern void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req); -extern void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req); +extern void ceph_mdsc_release_dir_caps_async(struct ceph_mds_request *req); static inline void ceph_mdsc_get_request(struct ceph_mds_request *req) { kref_get(&req->r_kref); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index b06e2bc86221b..b63b4cd9b5b68 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1255,8 +1255,6 @@ extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps, extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps); extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); extern void ceph_put_cap_refs_async(struct ceph_inode_info *ci, int had); -extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci, - int had); extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, struct ceph_snap_context *snapc); extern void __ceph_remove_capsnap(struct inode *inode, -- GitLab From 44d3b8a19b91cd2af11f918b2fd05628383172de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Wed, 7 Feb 2024 12:26:24 +0100 Subject: [PATCH 0762/1405] ASoC: Intel: avs: Fix dynamic port assignment when TDM is set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case TDM is set in topology on SSP0, parser will overwrite vindex value, because it only checks if port is set. Fix this by checking whole field value. Fixes: e6d50e474e45 ("ASoC: Intel: avs: Improve topology parsing of dynamic strings") Reviewed-by: Cezary Rojewski Signed-off-by: Amadeusz Sławiński Link: https://lore.kernel.org/r/20240207112624.2132821-1-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/avs/topology.c b/sound/soc/intel/avs/topology.c index 778236d3fd280..48b3c67c91032 100644 --- a/sound/soc/intel/avs/topology.c +++ b/sound/soc/intel/avs/topology.c @@ -857,7 +857,7 @@ assign_copier_gtw_instance(struct snd_soc_component *comp, struct avs_tplg_modcf } /* If topology sets value don't overwrite it */ - if (cfg->copier.vindex.i2s.instance) + if (cfg->copier.vindex.val) return; mach = dev_get_platdata(comp->card->dev); -- GitLab From 5001bfe927b594470f3a2484cb410b8b42a47903 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Mon, 5 Feb 2024 12:03:43 -0700 Subject: [PATCH 0763/1405] MAINTAINERS: Maintainer change for rds At this point, Santosh has moved onto other things and I am happy to take over the role of rds maintainer. Update the MAINTAINERS accordingly. Signed-off-by: Allison Henderson Link: https://lore.kernel.org/r/20240205190343.112436-1-allison.henderson@oracle.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 722b894f305ed..e3e670ee56108 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18433,7 +18433,7 @@ S: Supported F: drivers/infiniband/sw/rdmavt RDS - RELIABLE DATAGRAM SOCKETS -M: Santosh Shilimkar +M: Allison Henderson L: netdev@vger.kernel.org L: linux-rdma@vger.kernel.org L: rds-devel@oss.oracle.com (moderated for non-subscribers) -- GitLab From 75428f537d7cae33c7e4dd726144074f78622c09 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 5 Feb 2024 18:29:06 -0800 Subject: [PATCH 0764/1405] net: intel: fix old compiler regressions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kernel build regressions/improvements email contained a couple of issues with old compilers (in fact all the reports were on different architectures, but all gcc 5.5) and the FIELD_PREP() and FIELD_GET() conversions. They're all because an integer #define that should have been declared as unsigned, was shifted to the point that it could set the sign bit. The fix just involves making sure the defines use the "U" identifier on the constants to make sure they're unsigned. Should make the checkers happier. Confirmed with objdump before/after that there is no change to the binaries. Issues were reported as follows: ./drivers/net/ethernet/intel/ice/ice_base.c:238:7: note: in expansion of macro 'FIELD_GET' (FIELD_GET(GLINT_CTL_ITR_GRAN_25_M, regval) == ICE_ITR_GRAN_US)) ^ ./include/linux/compiler_types.h:435:38: error: call to '__compiletime_assert_1093' declared with attribute error: FIELD_GET: mask is not constant drivers/net/ethernet/intel/ice/ice_nvm.c:709:16: note: in expansion of macro ‘FIELD_GET’ orom->major = FIELD_GET(ICE_OROM_VER_MASK, combo_ver); ^ ./include/linux/compiler_types.h:435:38: error: call to ‘__compiletime_assert_796’ declared with attribute error: FIELD_GET: mask is not constant drivers/net/ethernet/intel/ice/ice_common.c:945:18: note: in expansion of macro ‘FIELD_GET’ u8 max_agg_bw = FIELD_GET(GL_PWR_MODE_CTL_CAR_MAX_BW_M, ^ ./include/linux/compiler_types.h:435:38: error: call to ‘__compiletime_assert_420’ declared with attribute error: FIELD_GET: mask is not constant drivers/net/ethernet/intel/i40e/i40e_dcb.c:458:8: note: in expansion of macro ‘FIELD_GET’ oui = FIELD_GET(I40E_LLDP_TLV_OUI_MASK, ouisubtype); ^ Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/lkml/d03e90ca-8485-4d1b-5ec1-c3398e0e8da@linux-m68k.org/ #i40e #ice Fixes: 62589808d73b ("i40e: field get conversion") Fixes: 5a259f8e0baf ("ice: field get conversion") Signed-off-by: Jesse Brandeburg Link: https://lore.kernel.org/r/20240206022906.2194214-1-jesse.brandeburg@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_dcb.h | 2 +- drivers/net/ethernet/intel/ice/ice_osdep.h | 2 +- drivers/net/ethernet/intel/ice/ice_type.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.h b/drivers/net/ethernet/intel/i40e/i40e_dcb.h index 6b60dc9b77361..d76497566e40e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.h +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.h @@ -43,7 +43,7 @@ #define I40E_LLDP_TLV_SUBTYPE_SHIFT 0 #define I40E_LLDP_TLV_SUBTYPE_MASK (0xFF << I40E_LLDP_TLV_SUBTYPE_SHIFT) #define I40E_LLDP_TLV_OUI_SHIFT 8 -#define I40E_LLDP_TLV_OUI_MASK (0xFFFFFF << I40E_LLDP_TLV_OUI_SHIFT) +#define I40E_LLDP_TLV_OUI_MASK (0xFFFFFFU << I40E_LLDP_TLV_OUI_SHIFT) /* Defines for IEEE ETS TLV */ #define I40E_IEEE_ETS_MAXTC_SHIFT 0 diff --git a/drivers/net/ethernet/intel/ice/ice_osdep.h b/drivers/net/ethernet/intel/ice/ice_osdep.h index 82bc54fec7f36..a2562f04267f2 100644 --- a/drivers/net/ethernet/intel/ice/ice_osdep.h +++ b/drivers/net/ethernet/intel/ice/ice_osdep.h @@ -24,7 +24,7 @@ #define rd64(a, reg) readq((a)->hw_addr + (reg)) #define ice_flush(a) rd32((a), GLGEN_STAT) -#define ICE_M(m, s) ((m) << (s)) +#define ICE_M(m, s) ((m ## U) << (s)) struct ice_dma_mem { void *va; diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 41ab6d7bbd9ef..a508e917ce5ff 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -1072,7 +1072,7 @@ struct ice_aq_get_set_rss_lut_params { #define ICE_OROM_VER_BUILD_SHIFT 8 #define ICE_OROM_VER_BUILD_MASK (0xffff << ICE_OROM_VER_BUILD_SHIFT) #define ICE_OROM_VER_SHIFT 24 -#define ICE_OROM_VER_MASK (0xff << ICE_OROM_VER_SHIFT) +#define ICE_OROM_VER_MASK (0xffU << ICE_OROM_VER_SHIFT) #define ICE_SR_PFA_PTR 0x40 #define ICE_SR_1ST_NVM_BANK_PTR 0x42 #define ICE_SR_NVM_BANK_SIZE 0x43 -- GitLab From ce68c035457bdd025a9961e0ba2157323090c581 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 30 Jan 2024 13:01:14 +0100 Subject: [PATCH 0765/1405] riscv: Fix arch_hugetlb_migration_supported() for NAPOT arch_hugetlb_migration_supported() must be reimplemented to add support for NAPOT hugepages, which is done here. Fixes: 82a1a1f3bfb6 ("riscv: mm: support Svnapot in hugetlb page") Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240130120114.106003-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/hugetlb.h | 3 +++ arch/riscv/mm/hugetlbpage.c | 16 +++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index 4c5b0e929890f..20f9c3ba23414 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -11,6 +11,9 @@ static inline void arch_clear_hugepage_flags(struct page *page) } #define arch_clear_hugepage_flags arch_clear_hugepage_flags +bool arch_hugetlb_migration_supported(struct hstate *h); +#define arch_hugetlb_migration_supported arch_hugetlb_migration_supported + #ifdef CONFIG_RISCV_ISA_SVNAPOT #define __HAVE_ARCH_HUGE_PTE_CLEAR void huge_pte_clear(struct mm_struct *mm, unsigned long addr, diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 87406b26c3dad..29c7606414d27 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -364,7 +364,7 @@ void huge_pte_clear(struct mm_struct *mm, pte_clear(mm, addr, ptep); } -static __init bool is_napot_size(unsigned long size) +static bool is_napot_size(unsigned long size) { unsigned long order; @@ -392,7 +392,7 @@ arch_initcall(napot_hugetlbpages_init); #else -static __init bool is_napot_size(unsigned long size) +static bool is_napot_size(unsigned long size) { return false; } @@ -409,7 +409,7 @@ int pmd_huge(pmd_t pmd) return pmd_leaf(pmd); } -bool __init arch_hugetlb_valid_size(unsigned long size) +static bool __hugetlb_valid_size(unsigned long size) { if (size == HPAGE_SIZE) return true; @@ -421,6 +421,16 @@ bool __init arch_hugetlb_valid_size(unsigned long size) return false; } +bool __init arch_hugetlb_valid_size(unsigned long size) +{ + return __hugetlb_valid_size(size); +} + +bool arch_hugetlb_migration_supported(struct hstate *h) +{ + return __hugetlb_valid_size(huge_page_size(h)); +} + #ifdef CONFIG_CONTIG_ALLOC static __init int gigantic_pages_init(void) { -- GitLab From 2cf963787529f615f7c93bdcf13a5e82029e7f38 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 23 Nov 2023 13:42:14 +0000 Subject: [PATCH 0766/1405] riscv: declare overflow_stack as exported from traps.c The percpu area overflow_stacks is exported from arch/riscv/kernel/traps.c for use in the entry code, but is not declared anywhere. Add the relevant declaration to arch/riscv/include/asm/stacktrace.h to silence the following sparse warning: arch/riscv/kernel/traps.c:395:1: warning: symbol '__pcpu_scope_overflow_stack' was not declared. Should it be static? We don't add the stackinfo_get_overflow() call as for some of the other architectures as this doesn't seem to be used yet, so just silence the warning. Signed-off-by: Ben Dooks Reviewed-by: Conor Dooley Fixes: be97d0db5f44 ("riscv: VMAP_STACK overflow detection thread-safe") Link: https://lore.kernel.org/r/20231123134214.81481-1-ben.dooks@codethink.co.uk Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/stacktrace.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/riscv/include/asm/stacktrace.h b/arch/riscv/include/asm/stacktrace.h index f7e8ef2418b99..b1495a7e06ce6 100644 --- a/arch/riscv/include/asm/stacktrace.h +++ b/arch/riscv/include/asm/stacktrace.h @@ -21,4 +21,9 @@ static inline bool on_thread_stack(void) return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1)); } + +#ifdef CONFIG_VMAP_STACK +DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack); +#endif /* CONFIG_VMAP_STACK */ + #endif /* _ASM_RISCV_STACKTRACE_H */ -- GitLab From 3951f6add519a8e954bf78691a412f65b24f4715 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 30 Jan 2024 12:55:08 +0100 Subject: [PATCH 0767/1405] riscv: Fix arch_tlbbatch_flush() by clearing the batch cpumask We must clear the cpumask once we have flushed the batch, otherwise cpus get accumulated and we end sending IPIs to more cpus than needed. Fixes: 54d7431af73e ("riscv: Add support for BATCHED_UNMAP_TLB_FLUSH") Signed-off-by: Alexandre Ghiti Reviewed-by: Charlie Jenkins Reviewed-by: Jisheng Zhang Link: https://lore.kernel.org/r/20240130115508.105386-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/tlbflush.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index dffb0e5bd9425..893566e004b73 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -234,4 +234,5 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) { __flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); + cpumask_clear(&batch->cpumask); } -- GitLab From 1f4137e882c621f8ed4bd4da9b10cf91d059e52a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 6 Feb 2024 09:47:21 -0800 Subject: [PATCH 0768/1405] nvme: move passthrough logging attribute to head The namespace does not have attributes, but the head does. Move the new logging attribute to that structure instead of dereferencing the wrong type. And while we're here, fix the reverse-tree coding style. Fixes: 9f079dda14339e ("nvme: allow passthru cmd error logging") Reported-by: Tasmiya Nalatwad Tested-by: Tasmiya Nalatwad Reviewed-by: Chaitanya Kulkarni Reviewed-by: Alan Adamson Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 3 +-- drivers/nvme/host/nvme.h | 2 +- drivers/nvme/host/sysfs.c | 30 +++++++++++++++--------------- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 975245527c1fc..246e92c05aac4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -713,7 +713,7 @@ void nvme_init_request(struct request *req, struct nvme_command *cmd) if (req->q->queuedata) { struct nvme_ns *ns = req->q->disk->private_data; - logging_enabled = ns->passthru_err_log_enabled; + logging_enabled = ns->head->passthru_err_log_enabled; req->timeout = NVME_IO_TIMEOUT; } else { /* no queuedata implies admin queue */ logging_enabled = nr->ctrl->passthru_err_log_enabled; @@ -3696,7 +3696,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) ns->disk = disk; ns->queue = disk->queue; - ns->passthru_err_log_enabled = false; if (ctrl->opts && ctrl->opts->data_digest) blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 3897334e3950d..7b87763e2f8a6 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -455,6 +455,7 @@ struct nvme_ns_head { struct list_head entry; struct kref ref; bool shared; + bool passthru_err_log_enabled; int instance; struct nvme_effects_log *effects; u64 nuse; @@ -523,7 +524,6 @@ struct nvme_ns { struct device cdev_device; struct nvme_fault_inject fault_inject; - bool passthru_err_log_enabled; }; /* NVMe ns supports metadata actions by the controller (generate/strip) */ diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index d099218e494a8..f2832f70e7e0a 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -48,8 +48,8 @@ static ssize_t nvme_adm_passthru_err_log_enabled_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - int err; bool passthru_err_log_enabled; + int err; err = kstrtobool(buf, &passthru_err_log_enabled); if (err) @@ -60,25 +60,34 @@ static ssize_t nvme_adm_passthru_err_log_enabled_store(struct device *dev, return count; } +static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev) +{ + struct gendisk *disk = dev_to_disk(dev); + + if (nvme_disk_is_ns_head(disk)) + return disk->private_data; + return nvme_get_ns_from_dev(dev)->head; +} + static ssize_t nvme_io_passthru_err_log_enabled_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nvme_ns *n = dev_get_drvdata(dev); + struct nvme_ns_head *head = dev_to_ns_head(dev); - return sysfs_emit(buf, n->passthru_err_log_enabled ? "on\n" : "off\n"); + return sysfs_emit(buf, head->passthru_err_log_enabled ? "on\n" : "off\n"); } static ssize_t nvme_io_passthru_err_log_enabled_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct nvme_ns *ns = dev_get_drvdata(dev); - int err; + struct nvme_ns_head *head = dev_to_ns_head(dev); bool passthru_err_log_enabled; + int err; err = kstrtobool(buf, &passthru_err_log_enabled); if (err) return -EINVAL; - ns->passthru_err_log_enabled = passthru_err_log_enabled; + head->passthru_err_log_enabled = passthru_err_log_enabled; return count; } @@ -91,15 +100,6 @@ static struct device_attribute dev_attr_io_passthru_err_log_enabled = \ __ATTR(passthru_err_log_enabled, S_IRUGO | S_IWUSR, \ nvme_io_passthru_err_log_enabled_show, nvme_io_passthru_err_log_enabled_store); -static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev) -{ - struct gendisk *disk = dev_to_disk(dev); - - if (nvme_disk_is_ns_head(disk)) - return disk->private_data; - return nvme_get_ns_from_dev(dev)->head; -} - static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, char *buf) { -- GitLab From e8c263ed6de8183e670fbd127c2512292a2ad8eb Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 5 Feb 2024 16:30:21 -0800 Subject: [PATCH 0769/1405] nvme-core: fix comment to reflect right functions The functions and the attribute listed in the comment doesn't exists in the code, (ns->logging_enabled, nvme_passthru_err_log_enabled_store() and nvme_passthru_err_log_enabled_show()) Update the comment with right function names and a comment ns->head->passthru_err_log_enabled, nvme_io_passthru_err_log_enabled_store() and nvme_io_passthru_err_log_enabled_show(). Signed-off-by: Chaitanya Kulkarni Reviewed-by: Alan Adamson Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 246e92c05aac4..60537c9224bf9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3761,8 +3761,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) /* * Set ns->disk->device->driver_data to ns so we can access - * ns->logging_enabled in nvme_passthru_err_log_enabled_store() and - * nvme_passthru_err_log_enabled_show(). + * ns->head->passthru_err_log_enabled in + * nvme_io_passthru_err_log_enabled_[store | show](). */ dev_set_drvdata(disk_to_dev(ns->disk), ns); -- GitLab From b5d1b4b46f856da1473c7ba9a5cdfcb55c9b2478 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 26 Jan 2024 11:40:37 +0300 Subject: [PATCH 0770/1405] PCI: dwc: Fix a 64bit bug in dw_pcie_ep_raise_msix_irq() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "msg_addr" variable is u64. However, the "aligned_offset" is an unsigned int. This means that when the code does: msg_addr &= ~aligned_offset; it will unintentionally zero out the high 32 bits. Use ALIGN_DOWN() to do the alignment instead. Fixes: 2217fffcd63f ("PCI: dwc: endpoint: Fix dw_pcie_ep_raise_msix_irq() alignment support") Link: https://lore.kernel.org/r/af59c7ad-ab93-40f7-ad4a-7ac0b14d37f5@moroto.mountain Signed-off-by: Dan Carpenter Signed-off-by: Bjorn Helgaas Reviewed-by: Niklas Cassel Reviewed-by: Ilpo Järvinen Reviewed-by: Manivannan Sadhasivam Cc: --- drivers/pci/controller/dwc/pcie-designware-ep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 5befed2dc02b7..d6b66597101e4 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -6,6 +6,7 @@ * Author: Kishon Vijay Abraham I */ +#include #include #include #include @@ -551,7 +552,7 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no, } aligned_offset = msg_addr & (epc->mem->window.page_size - 1); - msg_addr &= ~aligned_offset; + msg_addr = ALIGN_DOWN(msg_addr, epc->mem->window.page_size); ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr, epc->mem->window.page_size); if (ret) -- GitLab From 67057f48df79a3d73683385f521215146861684b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 26 Jan 2024 11:41:01 +0300 Subject: [PATCH 0771/1405] PCI: dwc: Clean up dw_pcie_ep_raise_msi_irq() alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I recently changed the alignment code in dw_pcie_ep_raise_msix_irq(). The code in dw_pcie_ep_raise_msi_irq() is similar, so update it to match, just for consistency. (No effect on runtime, just a cleanup). Link: https://lore.kernel.org/r/184097e0-c728-42c7-9e8a-556bd33fb612@moroto.mountain Signed-off-by: Dan Carpenter Signed-off-by: Bjorn Helgaas Reviewed-by: Niklas Cassel Reviewed-by: Ilpo Järvinen Reviewed-by: Manivannan Sadhasivam --- drivers/pci/controller/dwc/pcie-designware-ep.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index d6b66597101e4..9a437cfce073c 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -483,9 +483,10 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no, reg = ep_func->msi_cap + PCI_MSI_DATA_32; msg_data = dw_pcie_ep_readw_dbi(ep, func_no, reg); } - aligned_offset = msg_addr_lower & (epc->mem->window.page_size - 1); - msg_addr = ((u64)msg_addr_upper) << 32 | - (msg_addr_lower & ~aligned_offset); + msg_addr = ((u64)msg_addr_upper) << 32 | msg_addr_lower; + + aligned_offset = msg_addr & (epc->mem->window.page_size - 1); + msg_addr = ALIGN_DOWN(msg_addr, epc->mem->window.page_size); ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr, epc->mem->window.page_size); if (ret) -- GitLab From 46f5ab762d048dad224436978315cbc2fa79c630 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 6 Feb 2024 11:22:09 +0100 Subject: [PATCH 0772/1405] fs: relax mount_setattr() permission checks When we added mount_setattr() I added additional checks compared to the legacy do_reconfigure_mnt() and do_change_type() helpers used by regular mount(2). If that mount had a parent then verify that the caller and the mount namespace the mount is attached to match and if not make sure that it's an anonymous mount. The real rootfs falls into neither category. It is neither an anoymous mount because it is obviously attached to the initial mount namespace but it also obviously doesn't have a parent mount. So that means legacy mount(2) allows changing mount properties on the real rootfs but mount_setattr(2) blocks this. I never thought much about this but of course someone on this planet of earth changes properties on the real rootfs as can be seen in [1]. Since util-linux finally switched to the new mount api in 2.39 not so long ago it also relies on mount_setattr() and that surfaced this issue when Fedora 39 finally switched to it. Fix this. Link: https://bugzilla.redhat.com/show_bug.cgi?id=2256843 Link: https://lore.kernel.org/r/20240206-vfs-mount-rootfs-v1-1-19b335eee133@kernel.org Reviewed-by: Jan Kara Reported-by: Karel Zak Cc: stable@vger.kernel.org # v5.12+ Signed-off-by: Christian Brauner --- fs/namespace.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 437f60e96d405..5a51315c66781 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4472,10 +4472,15 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr) /* * If this is an attached mount make sure it's located in the callers * mount namespace. If it's not don't let the caller interact with it. - * If this is a detached mount make sure it has an anonymous mount - * namespace attached to it, i.e. we've created it via OPEN_TREE_CLONE. + * + * If this mount doesn't have a parent it's most often simply a + * detached mount with an anonymous mount namespace. IOW, something + * that's simply not attached yet. But there are apparently also users + * that do change mount properties on the rootfs itself. That obviously + * neither has a parent nor is it a detached mount so we cannot + * unconditionally check for detached mounts. */ - if (!(mnt_has_parent(mnt) ? check_mnt(mnt) : is_anon_ns(mnt->mnt_ns))) + if ((mnt_has_parent(mnt) || !is_anon_ns(mnt->mnt_ns)) && !check_mnt(mnt)) goto out; /* -- GitLab From 36fa8d697132b4bed2312d700310e8a78b000c84 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 1 Feb 2024 22:58:36 +0100 Subject: [PATCH 0773/1405] netfilter: nft_compat: narrow down revision to unsigned 8-bits xt_find_revision() expects u8, restrict it to this datatype. Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index f0eeda97bfcd9..001b6841a4b67 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -135,7 +135,7 @@ static void nft_target_eval_bridge(const struct nft_expr *expr, static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = { [NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING }, - [NFTA_TARGET_REV] = { .type = NLA_U32 }, + [NFTA_TARGET_REV] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_TARGET_INFO] = { .type = NLA_BINARY }, }; @@ -419,7 +419,7 @@ static void nft_match_eval(const struct nft_expr *expr, static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = { [NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING }, - [NFTA_MATCH_REV] = { .type = NLA_U32 }, + [NFTA_MATCH_REV] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_MATCH_INFO] = { .type = NLA_BINARY }, }; @@ -724,7 +724,7 @@ static int nfnl_compat_get_rcu(struct sk_buff *skb, static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = { [NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING, .len = NFT_COMPAT_NAME_MAX-1 }, - [NFTA_COMPAT_REV] = { .type = NLA_U32 }, + [NFTA_COMPAT_REV] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_COMPAT_TYPE] = { .type = NLA_U32 }, }; -- GitLab From 292781c3c5485ce33bd22b2ef1b2bed709b4d672 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 1 Feb 2024 23:33:29 +0100 Subject: [PATCH 0774/1405] netfilter: nft_compat: reject unused compat flag Flag (1 << 0) is ignored is set, never used, reject it it with EINVAL instead. Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nft_compat.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index ca30232b7bc8a..117c6a9b845b1 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -285,9 +285,11 @@ enum nft_rule_attributes { /** * enum nft_rule_compat_flags - nf_tables rule compat flags * + * @NFT_RULE_COMPAT_F_UNUSED: unused * @NFT_RULE_COMPAT_F_INV: invert the check result */ enum nft_rule_compat_flags { + NFT_RULE_COMPAT_F_UNUSED = (1 << 0), NFT_RULE_COMPAT_F_INV = (1 << 1), NFT_RULE_COMPAT_F_MASK = NFT_RULE_COMPAT_F_INV, }; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 001b6841a4b67..ed71d5ecbe0a5 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -212,7 +212,8 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) return -EINVAL; flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS])); - if (flags & ~NFT_RULE_COMPAT_F_MASK) + if (flags & NFT_RULE_COMPAT_F_UNUSED || + flags & ~NFT_RULE_COMPAT_F_MASK) return -EINVAL; if (flags & NFT_RULE_COMPAT_F_INV) *inv = true; -- GitLab From d694b754894c93fb4d71a7f3699439dec111decc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 2 Feb 2024 00:05:23 +0100 Subject: [PATCH 0775/1405] netfilter: nft_compat: restrict match/target protocol to u16 xt_check_{match,target} expects u16, but NFTA_RULE_COMPAT_PROTO is u32. NLA_POLICY_MAX(NLA_BE32, 65535) cannot be used because .max in nla_policy is s16, see 3e48be05f3c7 ("netlink: add attribute range validation to policy"). Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index ed71d5ecbe0a5..1f9474fefe849 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -200,6 +200,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) { struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1]; + u32 l4proto; u32 flags; int err; @@ -218,7 +219,12 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) if (flags & NFT_RULE_COMPAT_F_INV) *inv = true; - *proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + l4proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + if (l4proto > U16_MAX) + return -EINVAL; + + *proto = l4proto; + return 0; } -- GitLab From e96fddb32931d007db12b1fce9b5e8e4c080401b Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sat, 27 Jan 2024 18:34:01 +0530 Subject: [PATCH 0776/1405] drm/amd/display: Fix 'panel_cntl' could be null in 'dcn21_set_backlight_level()' 'panel_cntl' structure used to control the display panel could be null, dereferencing it could lead to a null pointer access. Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn21/dcn21_hwseq.c:269 dcn21_set_backlight_level() error: we previously assumed 'panel_cntl' could be null (see line 250) Fixes: 474ac4a875ca ("drm/amd/display: Implement some asic specific abm call backs.") Cc: Yongqiang Sun Cc: Anthony Koo Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam Reviewed-by: Anthony Koo Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dcn21/dcn21_hwseq.c | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c index 8e88dcaf88f5b..a9cd39f773604 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c @@ -237,34 +237,35 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx, { struct dc_context *dc = pipe_ctx->stream->ctx; struct abm *abm = pipe_ctx->stream_res.abm; + struct timing_generator *tg = pipe_ctx->stream_res.tg; struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl; + uint32_t otg_inst; + + if (!abm && !tg && !panel_cntl) + return false; + + otg_inst = tg->inst; if (dc->dc->res_pool->dmcu) { dce110_set_backlight_level(pipe_ctx, backlight_pwm_u16_16, frame_ramp); return true; } - if (abm != NULL) { - uint32_t otg_inst = pipe_ctx->stream_res.tg->inst; - - if (abm && panel_cntl) { - if (abm->funcs && abm->funcs->set_pipe_ex) { - abm->funcs->set_pipe_ex(abm, - otg_inst, - SET_ABM_PIPE_NORMAL, - panel_cntl->inst, - panel_cntl->pwrseq_inst); - } else { - dmub_abm_set_pipe(abm, - otg_inst, - SET_ABM_PIPE_NORMAL, - panel_cntl->inst, - panel_cntl->pwrseq_inst); - } - } + if (abm->funcs && abm->funcs->set_pipe_ex) { + abm->funcs->set_pipe_ex(abm, + otg_inst, + SET_ABM_PIPE_NORMAL, + panel_cntl->inst, + panel_cntl->pwrseq_inst); + } else { + dmub_abm_set_pipe(abm, + otg_inst, + SET_ABM_PIPE_NORMAL, + panel_cntl->inst, + panel_cntl->pwrseq_inst); } - if (abm && abm->funcs && abm->funcs->set_backlight_level_pwm) + if (abm->funcs && abm->funcs->set_backlight_level_pwm) abm->funcs->set_backlight_level_pwm(abm, backlight_pwm_u16_16, frame_ramp, 0, panel_cntl->inst); else -- GitLab From 66951d98d9bf45ba25acf37fe0747253fafdf298 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 31 Jan 2024 08:49:41 +0530 Subject: [PATCH 0777/1405] drm/amd/display: Add NULL test for 'timing generator' in 'dcn21_set_pipe()' In "u32 otg_inst = pipe_ctx->stream_res.tg->inst;" pipe_ctx->stream_res.tg could be NULL, it is relying on the caller to ensure the tg is not NULL. Fixes: 474ac4a875ca ("drm/amd/display: Implement some asic specific abm call backs.") Cc: Yongqiang Sun Cc: Anthony Koo Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam Reviewed-by: Anthony Koo Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dcn21/dcn21_hwseq.c | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c index a9cd39f773604..5c7f380a84f91 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c @@ -206,28 +206,32 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx) void dcn21_set_pipe(struct pipe_ctx *pipe_ctx) { struct abm *abm = pipe_ctx->stream_res.abm; - uint32_t otg_inst = pipe_ctx->stream_res.tg->inst; + struct timing_generator *tg = pipe_ctx->stream_res.tg; struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl; struct dmcu *dmcu = pipe_ctx->stream->ctx->dc->res_pool->dmcu; + uint32_t otg_inst; + + if (!abm && !tg && !panel_cntl) + return; + + otg_inst = tg->inst; if (dmcu) { dce110_set_pipe(pipe_ctx); return; } - if (abm && panel_cntl) { - if (abm->funcs && abm->funcs->set_pipe_ex) { - abm->funcs->set_pipe_ex(abm, + if (abm->funcs && abm->funcs->set_pipe_ex) { + abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst, panel_cntl->pwrseq_inst); - } else { - dmub_abm_set_pipe(abm, otg_inst, - SET_ABM_PIPE_NORMAL, - panel_cntl->inst, - panel_cntl->pwrseq_inst); - } + } else { + dmub_abm_set_pipe(abm, otg_inst, + SET_ABM_PIPE_NORMAL, + panel_cntl->inst, + panel_cntl->pwrseq_inst); } } -- GitLab From 2103370afba74dda39ff5d2d69163c86644ce528 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Thu, 18 Jan 2024 18:12:15 -0500 Subject: [PATCH 0778/1405] drm/amd/display: set odm_combine_policy based on context in dcn32 resource [why] When populating dml pipes, odm combine policy should be assigned based on the pipe topology of the context passed in. DML pipes could be repopulated multiple times during single validate bandwidth attempt. We need to make sure that whenever we repopulate the dml pipes it is always aligned with the updated context. There is a case where DML pipes get repopulated during FPO optimization after ODM combine policy is changed. Since in the current code we reinitlaize ODM combine policy, even though the current context has ODM combine enabled, we overwrite it despite the pipes are already split. This causes DML to think that MPC combine is used so we mistakenly enable MPC combine because we apply pipe split with ODM combine policy reset. This issue doesn't impact non windowed MPO with ODM case because the legacy policy has restricted use cases. We don't encounter the case where both ODM and FPO optimizations are enabled together. So we decide to leave it as is because it is about to be replaced anyway. Cc: stable@vger.kernel.org # 6.6+ Reviewed-by: Chaitanya Dhere Reviewed-by: Alvin Lee Acked-by: Hamza Mahfooz Signed-off-by: Wenjing Liu Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 15 ++++++++++---- drivers/gpu/drm/amd/display/dc/inc/resource.h | 20 ++++++++----------- .../dc/resource/dcn32/dcn32_resource.c | 16 ++++++++++++++- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index dd781a20692ee..ba76dd4a2ce29 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1288,7 +1288,7 @@ static bool update_pipes_with_split_flags(struct dc *dc, struct dc_state *contex return updated; } -static bool should_allow_odm_power_optimization(struct dc *dc, +static bool should_apply_odm_power_optimization(struct dc *dc, struct dc_state *context, struct vba_vars_st *v, int *split, bool *merge) { @@ -1392,9 +1392,12 @@ static void try_odm_power_optimization_and_revalidate( { int i; unsigned int new_vlevel; + unsigned int cur_policy[MAX_PIPES]; - for (i = 0; i < pipe_cnt; i++) + for (i = 0; i < pipe_cnt; i++) { + cur_policy[i] = pipes[i].pipe.dest.odm_combine_policy; pipes[i].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; + } new_vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); @@ -1403,6 +1406,9 @@ static void try_odm_power_optimization_and_revalidate( memset(merge, 0, MAX_PIPES * sizeof(bool)); *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, new_vlevel, split, merge); context->bw_ctx.dml.vba.VoltageLevel = *vlevel; + } else { + for (i = 0; i < pipe_cnt; i++) + pipes[i].pipe.dest.odm_combine_policy = cur_policy[i]; } } @@ -1580,7 +1586,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, } } - if (should_allow_odm_power_optimization(dc, context, vba, split, merge)) + if (should_apply_odm_power_optimization(dc, context, vba, split, merge)) try_odm_power_optimization_and_revalidate( dc, context, pipes, split, merge, vlevel, *pipe_cnt); @@ -2209,7 +2215,8 @@ bool dcn32_internal_validate_bw(struct dc *dc, int i; pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); - dcn32_update_dml_pipes_odm_policy_based_on_context(dc, context, pipes); + if (!dc->config.enable_windowed_mpo_odm) + dcn32_update_dml_pipes_odm_policy_based_on_context(dc, context, pipes); /* repopulate_pipes = 1 means the pipes were either split or merged. In this case * we have to re-calculate the DET allocation and run through DML once more to diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index c958ef37b78a6..77a60aa9f27bb 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -427,22 +427,18 @@ struct pipe_ctx *resource_get_primary_dpp_pipe(const struct pipe_ctx *dpp_pipe); int resource_get_mpc_slice_index(const struct pipe_ctx *dpp_pipe); /* - * Get number of MPC "cuts" of the plane associated with the pipe. MPC slice - * count is equal to MPC splits + 1. For example if a plane is cut 3 times, it - * will have 4 pieces of slice. - * return - 0 if pipe is not used for a plane with MPCC combine. otherwise - * the number of MPC "cuts" for the plane. + * Get the number of MPC slices associated with the pipe. + * The function returns 0 if the pipe is not associated with an MPC combine + * pipe topology. */ -int resource_get_mpc_slice_count(const struct pipe_ctx *opp_head); +int resource_get_mpc_slice_count(const struct pipe_ctx *pipe); /* - * Get number of ODM "cuts" of the timing associated with the pipe. ODM slice - * count is equal to ODM splits + 1. For example if a timing is cut 3 times, it - * will have 4 pieces of slice. - * return - 0 if pipe is not used for ODM combine. otherwise - * the number of ODM "cuts" for the timing. + * Get the number of ODM slices associated with the pipe. + * The function returns 0 if the pipe is not associated with an ODM combine + * pipe topology. */ -int resource_get_odm_slice_count(const struct pipe_ctx *otg_master); +int resource_get_odm_slice_count(const struct pipe_ctx *pipe); /* Get the ODM slice index counting from 0 from left most slice */ int resource_get_odm_slice_index(const struct pipe_ctx *opp_head); diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index c4d71e7f18af4..6f10052caeef0 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -1829,7 +1829,21 @@ int dcn32_populate_dml_pipes_from_context( dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt); DC_FP_END(); pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; - pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; + if (dc->config.enable_windowed_mpo_odm && + dc->debug.enable_single_display_2to1_odm_policy) { + switch (resource_get_odm_slice_count(pipe)) { + case 2: + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; + break; + case 4: + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_4to1; + break; + default: + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; + } + } else { + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; + } pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_19; -- GitLab From 93bafa32a6918154aa0caf9f66679a32c2431357 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Tue, 16 Jan 2024 19:10:45 +0800 Subject: [PATCH 0779/1405] drm/amdgpu: skip to program GFXDEC registers for suspend abort In the suspend abort cases, the gfx power rail doesn't turn off so some GFXDEC registers/CSB can't reset to default value and at this moment reinitialize GFXDEC/CSB will result in an unexpected error. So let skip those program sequence for the suspend abort case. Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 ++++++++ 3 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3d8a48f46b015..6dce81a061ab1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1078,6 +1078,8 @@ struct amdgpu_device { bool in_s3; bool in_s4; bool in_s0ix; + /* indicate amdgpu suspension status */ + bool suspend_complete; enum pp_mp1_state mp1_state; struct amdgpu_doorbell_index doorbell_index; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 971acf01bea60..211501ea91694 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2476,6 +2476,7 @@ static int amdgpu_pmops_suspend(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + adev->suspend_complete = false; if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else if (amdgpu_acpi_is_s3_active(adev)) @@ -2490,6 +2491,7 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + adev->suspend_complete = true; if (amdgpu_acpi_should_gpu_reset(adev)) return amdgpu_asic_reset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 69c5009107460..3bc6943365a4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3034,6 +3034,14 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) gfx_v9_0_cp_gfx_enable(adev, true); + /* Now only limit the quirk on the APU gfx9 series and already + * confirmed that the APU gfx10/gfx11 needn't such update. + */ + if (adev->flags & AMD_IS_APU && + adev->in_s3 && !adev->suspend_complete) { + DRM_INFO(" Will skip the CSB packet resubmit\n"); + return 0; + } r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); -- GitLab From 6ef82ac664bb9568ca3956e0d9c9c478e25077ff Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Wed, 17 Jan 2024 13:39:37 +0800 Subject: [PATCH 0780/1405] drm/amdgpu: reset gpu for s3 suspend abort case In the s3 suspend abort case some type of gfx9 power rail not turn off from FCH side and this will put the GPU in an unknown power status, so let's reset the gpu to a known good power state before reinitialize gpu device. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 15033efec2bac..c64c01e2944a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1298,10 +1298,32 @@ static int soc15_common_suspend(void *handle) return soc15_common_hw_fini(adev); } +static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + + /* Will reset for the following suspend abort cases. + * 1) Only reset limit on APU side, dGPU hasn't checked yet. + * 2) S3 suspend abort and TOS already launched. + */ + if (adev->flags & AMD_IS_APU && adev->in_s3 && + !adev->suspend_complete && + sol_reg) + return true; + + return false; +} + static int soc15_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (soc15_need_reset_on_resume(adev)) { + dev_info(adev->dev, "S3 suspend abort case, let's reset ASIC.\n"); + soc15_asic_reset(adev); + } return soc15_common_hw_init(adev); } -- GitLab From 897925dcc5dfff5b3b23ba991a89fe3ebaca6ef8 Mon Sep 17 00:00:00 2001 From: Li Ma Date: Thu, 1 Feb 2024 19:31:06 +0800 Subject: [PATCH 0781/1405] drm/amdgpu: remove asymmetrical irq disabling in jpeg 4.0.5 suspend A supplement to commit: 615dd56ac5379f4239940be69139a33e79e59c67 There is an irq warning of jpeg during resume in s2idle process. No irq enabled in jpeg 4.0.5 resume. Fixes: 615dd56ac537 ("drm/amdgpu: remove asymmetrical irq disabling in vcn 4.0.5 suspend") Signed-off-by: Li Ma Acked-By: Saleemkhan Jamadar Reviewed-by: Yifan Zhang Reviewed-by: Veerabadhran Gopalakrishnan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c | 9 --------- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c | 10 ---------- 2 files changed, 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index bc38b90f8cf88..88ea58d5c4abf 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -674,14 +674,6 @@ static int jpeg_v4_0_set_powergating_state(void *handle, return ret; } -static int jpeg_v4_0_set_interrupt_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - unsigned type, - enum amdgpu_interrupt_state state) -{ - return 0; -} - static int jpeg_v4_0_set_ras_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned int type, @@ -765,7 +757,6 @@ static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev) } static const struct amdgpu_irq_src_funcs jpeg_v4_0_irq_funcs = { - .set = jpeg_v4_0_set_interrupt_state, .process = jpeg_v4_0_process_interrupt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 6ede85b28cc8c..78b74daf4eebf 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -181,7 +181,6 @@ static int jpeg_v4_0_5_hw_fini(void *handle) RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) jpeg_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); } - amdgpu_irq_put(adev, &adev->jpeg.inst->irq, 0); return 0; } @@ -516,14 +515,6 @@ static int jpeg_v4_0_5_set_powergating_state(void *handle, return ret; } -static int jpeg_v4_0_5_set_interrupt_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - unsigned type, - enum amdgpu_interrupt_state state) -{ - return 0; -} - static int jpeg_v4_0_5_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -603,7 +594,6 @@ static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev) } static const struct amdgpu_irq_src_funcs jpeg_v4_0_5_irq_funcs = { - .set = jpeg_v4_0_5_set_interrupt_state, .process = jpeg_v4_0_5_process_interrupt, }; -- GitLab From 280df4996c2bfc0e340ae758ab6da35748853a7e Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 23 Jan 2024 12:20:06 -0500 Subject: [PATCH 0782/1405] drm/amd/display: Increase eval/entry delay for DCN35 [Why] To match firmware measurements and avoid hanging when accessing HW that's in idle. [How] Increase the delays to what we've measured. Reviewed-by: Ovidiu Bunea Acked-by: Hamza Mahfooz Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 761ec98918756..2142cee6cac23 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -780,8 +780,8 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_z10 = false, .ignore_pg = true, .psp_disabled_wa = true, - .ips2_eval_delay_us = 200, - .ips2_entry_delay_us = 400, + .ips2_eval_delay_us = 1650, + .ips2_entry_delay_us = 800, .static_screen_wait_frames = 2, }; -- GitLab From 2dcf82a8e8dc930655787797ef8a3692b527c7a9 Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Mon, 5 Feb 2024 15:55:48 +0800 Subject: [PATCH 0783/1405] drm/amdgpu: Fix shared buff copy to user ta if invoke node buffer |-------- ta type ----------| |-------- ta id ----------| |-------- cmd id ----------| |------ shared buf len -----| |------ shared buffer ------| ta if invoke node buffer is as above, copy shared buffer data to correct location Signed-off-by: Stanley.Yang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 468a67b302d4c..ca5c86e5f7cd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -362,7 +362,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size } } - if (copy_to_user((char *)buf, context->mem_context.shared_buf, shared_buf_len)) + if (copy_to_user((char *)&buf[copy_pos], context->mem_context.shared_buf, shared_buf_len)) ret = -EFAULT; err_free_shared_buf: -- GitLab From e6a7df96facdcf5b1f71eb3ec26f2f9f6ad61e57 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Mon, 22 Jan 2024 13:43:46 -0500 Subject: [PATCH 0784/1405] drm/amd/display: Fix MST Null Ptr for RV The change try to fix below error specific to RV platform: BUG: kernel NULL pointer dereference, address: 0000000000000008 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 4 PID: 917 Comm: sway Not tainted 6.3.9-arch1-1 #1 124dc55df4f5272ccb409f39ef4872fc2b3376a2 Hardware name: LENOVO 20NKS01Y00/20NKS01Y00, BIOS R12ET61W(1.31 ) 07/28/2022 RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper] Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8> RSP: 0018:ffff960cc2df77d8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff8afb87e81280 RCX: 0000000000000224 RDX: ffff8afb9ee37c00 RSI: ffff8afb8da1a578 RDI: ffff8afb87e81280 RBP: ffff8afb83d67000 R08: 0000000000000001 R09: ffff8afb9652f850 R10: ffff960cc2df7908 R11: 0000000000000002 R12: 0000000000000000 R13: ffff8afb8d7688a0 R14: ffff8afb8da1a578 R15: 0000000000000224 FS: 00007f4dac35ce00(0000) GS:ffff8afe30b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 000000010ddc6000 CR4: 00000000003506e0 Call Trace: ? __die+0x23/0x70 ? page_fault_oops+0x171/0x4e0 ? plist_add+0xbe/0x100 ? exc_page_fault+0x7c/0x180 ? asm_exc_page_fault+0x26/0x30 ? drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026] ? drm_dp_atomic_find_time_slots+0x28/0x260 [drm_display_helper 0e67723696438d8e02b741593dd50d80b44c2026] compute_mst_dsc_configs_for_link+0x2ff/0xa40 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] ? fill_plane_buffer_attributes+0x419/0x510 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] compute_mst_dsc_configs_for_state+0x1e1/0x250 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] amdgpu_dm_atomic_check+0xecd/0x1190 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] drm_atomic_check_only+0x5c5/0xa40 drm_mode_atomic_ioctl+0x76e/0xbc0 ? _copy_to_user+0x25/0x30 ? drm_ioctl+0x296/0x4b0 ? __pfx_drm_mode_atomic_ioctl+0x10/0x10 drm_ioctl_kernel+0xcd/0x170 drm_ioctl+0x26d/0x4b0 ? __pfx_drm_mode_atomic_ioctl+0x10/0x10 amdgpu_drm_ioctl+0x4e/0x90 [amdgpu 62e600d2a75e9158e1cd0a243bdc8e6da040c054] __x64_sys_ioctl+0x94/0xd0 do_syscall_64+0x60/0x90 ? do_syscall_64+0x6c/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7f4dad17f76f Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c> RSP: 002b:00007ffd9ae859f0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 000055e255a55900 RCX: 00007f4dad17f76f RDX: 00007ffd9ae85a90 RSI: 00000000c03864bc RDI: 000000000000000b RBP: 00007ffd9ae85a90 R08: 0000000000000003 R09: 0000000000000003 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000c03864bc R13: 000000000000000b R14: 000055e255a7fc60 R15: 000055e255a01eb0 Modules linked in: rfcomm snd_seq_dummy snd_hrtimer snd_seq snd_seq_device ccm cmac algif_hash algif_skcipher af_alg joydev mousedev bnep > typec libphy k10temp ipmi_msghandler roles i2c_scmi acpi_cpufreq mac_hid nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_mas> CR2: 0000000000000008 ---[ end trace 0000000000000000 ]--- RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper] Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8> RSP: 0018:ffff960cc2df77d8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff8afb87e81280 RCX: 0000000000000224 RDX: ffff8afb9ee37c00 RSI: ffff8afb8da1a578 RDI: ffff8afb87e81280 RBP: ffff8afb83d67000 R08: 0000000000000001 R09: ffff8afb9652f850 R10: ffff960cc2df7908 R11: 0000000000000002 R12: 0000000000000000 R13: ffff8afb8d7688a0 R14: ffff8afb8da1a578 R15: 0000000000000224 FS: 00007f4dac35ce00(0000) GS:ffff8afe30b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 000000010ddc6000 CR4: 00000000003506e0 With a second DP monitor connected, drm_atomic_state in dm atomic check sequence does not include the connector state for the old/existing/first DP monitor. In such case, dsc determination policy would hit a null ptr when it tries to iterate the old/existing stream that does not have a valid connector state attached to it. When that happens, dm atomic check should call drm_atomic_get_connector_state for a new connector state. Existing dm has already done that, except for RV due to it does not have official support of dsc where .num_dsc is not defined in dcn10 resource cap, that prevent from getting drm_atomic_get_connector_state called. So, skip dsc determination policy for ASICs that don't have DSC support. Cc: stable@vger.kernel.org # 6.1+ Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2314 Reviewed-by: Wayne Lin Acked-by: Hamza Mahfooz Signed-off-by: Fangzhi Zuo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d292f290cd6eb..59d2eee72a329 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -10731,11 +10731,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, goto fail; } - ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); - if (ret) { - DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); - ret = -EINVAL; - goto fail; + if (dc_resource_is_dsc_encoding_supported(dc)) { + ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); + if (ret) { + DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); + ret = -EINVAL; + goto fail; + } } ret = dm_update_mst_vcpi_slots_for_dsc(state, dm_state->context, vars); -- GitLab From ca8179ba11f211cdcb6c12ddd83814eaec999738 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Fri, 26 Jan 2024 16:47:20 -0500 Subject: [PATCH 0785/1405] drm/amd/display: Update phantom pipe enable / disable sequence Previously we would call apply_ctx_to_hw to enable and disable phantom pipes. However, apply_ctx_to_hw can potentially update non-phantom pipes as well which is undesired. Instead of calling apply_ctx_to_hw as a whole, call the relevant helpers for each phantom pipe when enabling / disabling which will avoid us modifying hardware state for non-phantom pipes unknowingly. The use case is for an FRL display where FRL_Update is requested by the display. In this case link_state_valid flag is cleared in a passive callback thread and should be handled in the next stream / link update. However, due to the call to apply_ctx_to_hw for the phantom pipes during a flip, the main pipes were modified outside of the desired sequence (driver does not handle link_state_valid = 0 on flips). Cc: stable@vger.kernel.org # 6.6+ Reviewed-by: Samson Tam Acked-by: Hamza Mahfooz Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 4 +- .../amd/display/dc/hwss/dce110/dce110_hwseq.c | 4 +- .../amd/display/dc/hwss/dce110/dce110_hwseq.h | 4 + .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 2 +- .../amd/display/dc/hwss/dcn20/dcn20_hwseq.h | 4 + .../amd/display/dc/hwss/dcn32/dcn32_hwseq.c | 76 ++++++++++++++++--- .../amd/display/dc/hwss/dcn32/dcn32_hwseq.h | 2 + .../amd/display/dc/hwss/dcn32/dcn32_init.c | 3 + .../drm/amd/display/dc/hwss/hw_sequencer.h | 1 + .../display/dc/hwss/hw_sequencer_private.h | 7 ++ 10 files changed, 94 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index aa7c02ba948e9..2c424e435962d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3817,7 +3817,9 @@ static void commit_planes_for_stream(struct dc *dc, * programming has completed (we turn on phantom OTG in order * to complete the plane disable for phantom pipes). */ - dc->hwss.apply_ctx_to_hw(dc, context); + + if (dc->hwss.disable_phantom_streams) + dc->hwss.disable_phantom_streams(dc, context); } if (update_type != UPDATE_TYPE_FAST) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 2352428bcea3c..01493c49bd7a0 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1476,7 +1476,7 @@ static enum dc_status dce110_enable_stream_timing( return DC_OK; } -static enum dc_status apply_single_controller_ctx_to_hw( +enum dc_status dce110_apply_single_controller_ctx_to_hw( struct pipe_ctx *pipe_ctx, struct dc_state *context, struct dc *dc) @@ -2302,7 +2302,7 @@ enum dc_status dce110_apply_ctx_to_hw( if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe) continue; - status = apply_single_controller_ctx_to_hw( + status = dce110_apply_single_controller_ctx_to_hw( pipe_ctx, context, dc); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h index 08028a1779ae8..ed3cc3648e8e2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.h @@ -39,6 +39,10 @@ enum dc_status dce110_apply_ctx_to_hw( struct dc *dc, struct dc_state *context); +enum dc_status dce110_apply_single_controller_ctx_to_hw( + struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct dc *dc); void dce110_enable_stream(struct pipe_ctx *pipe_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index 4853ecac53f91..931ac8ed7069d 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -2561,7 +2561,7 @@ void dcn20_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx) tg->funcs->setup_vertical_interrupt2(tg, start_line); } -static void dcn20_reset_back_end_for_pipe( +void dcn20_reset_back_end_for_pipe( struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_state *context) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h index b94c85340abff..d950b3e54ec2c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.h @@ -84,6 +84,10 @@ enum dc_status dcn20_enable_stream_timing( void dcn20_disable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx); void dcn20_enable_stream_gating(struct dc *dc, struct pipe_ctx *pipe_ctx); void dcn20_setup_vupdate_interrupt(struct dc *dc, struct pipe_ctx *pipe_ctx); +void dcn20_reset_back_end_for_pipe( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context); void dcn20_init_blank( struct dc *dc, struct timing_generator *tg); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index 6c9299c7683df..aa36d7a56ca8c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -1474,9 +1474,44 @@ void dcn32_update_dsc_pg(struct dc *dc, } } +void dcn32_disable_phantom_streams(struct dc *dc, struct dc_state *context) +{ + struct dce_hwseq *hws = dc->hwseq; + int i; + + for (i = dc->res_pool->pipe_count - 1; i >= 0 ; i--) { + struct pipe_ctx *pipe_ctx_old = + &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (!pipe_ctx_old->stream) + continue; + + if (dc_state_get_pipe_subvp_type(dc->current_state, pipe_ctx_old) != SUBVP_PHANTOM) + continue; + + if (pipe_ctx_old->top_pipe || pipe_ctx_old->prev_odm_pipe) + continue; + + if (!pipe_ctx->stream || pipe_need_reprogram(pipe_ctx_old, pipe_ctx) || + (pipe_ctx->stream && dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM)) { + struct clock_source *old_clk = pipe_ctx_old->clock_source; + + if (hws->funcs.reset_back_end_for_pipe) + hws->funcs.reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state); + if (hws->funcs.enable_stream_gating) + hws->funcs.enable_stream_gating(dc, pipe_ctx_old); + if (old_clk) + old_clk->funcs->cs_power_down(old_clk); + } + } +} + void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context) { unsigned int i; + enum dc_status status = DC_OK; + struct dce_hwseq *hws = dc->hwseq; for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; @@ -1497,16 +1532,39 @@ void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context) } } for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; - - if (new_pipe->stream && dc_state_get_pipe_subvp_type(context, new_pipe) == SUBVP_PHANTOM) { - // If old context or new context has phantom pipes, apply - // the phantom timings now. We can't change the phantom - // pipe configuration safely without driver acquiring - // the DMCUB lock first. - dc->hwss.apply_ctx_to_hw(dc, context); - break; + struct pipe_ctx *pipe_ctx_old = + &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream == NULL) + continue; + + if (dc_state_get_pipe_subvp_type(context, pipe_ctx) != SUBVP_PHANTOM) + continue; + + if (pipe_ctx->stream == pipe_ctx_old->stream && + pipe_ctx->stream->link->link_state_valid) { + continue; } + + if (pipe_ctx_old->stream && !pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) + continue; + + if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe) + continue; + + if (hws->funcs.apply_single_controller_ctx_to_hw) + status = hws->funcs.apply_single_controller_ctx_to_hw( + pipe_ctx, + context, + dc); + + ASSERT(status == DC_OK); + +#ifdef CONFIG_DRM_AMD_DC_FP + if (hws->funcs.resync_fifo_dccg_dio) + hws->funcs.resync_fifo_dccg_dio(hws, dc, context); +#endif } } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h index cecf7f0f56719..069e20bc87c0a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.h @@ -111,6 +111,8 @@ void dcn32_update_dsc_pg(struct dc *dc, void dcn32_enable_phantom_streams(struct dc *dc, struct dc_state *context); +void dcn32_disable_phantom_streams(struct dc *dc, struct dc_state *context); + void dcn32_init_blank( struct dc *dc, struct timing_generator *tg); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c index 427cfc8c24a4b..e8ac94a005b83 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c @@ -109,6 +109,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = { .get_dcc_en_bits = dcn10_get_dcc_en_bits, .commit_subvp_config = dcn32_commit_subvp_config, .enable_phantom_streams = dcn32_enable_phantom_streams, + .disable_phantom_streams = dcn32_disable_phantom_streams, .subvp_pipe_control_lock = dcn32_subvp_pipe_control_lock, .update_visual_confirm_color = dcn10_update_visual_confirm_color, .subvp_pipe_control_lock_fast = dcn32_subvp_pipe_control_lock_fast, @@ -159,6 +160,8 @@ static const struct hwseq_private_funcs dcn32_private_funcs = { .set_pixels_per_cycle = dcn32_set_pixels_per_cycle, .resync_fifo_dccg_dio = dcn32_resync_fifo_dccg_dio, .is_dp_dig_pixel_rate_div_policy = dcn32_is_dp_dig_pixel_rate_div_policy, + .apply_single_controller_ctx_to_hw = dce110_apply_single_controller_ctx_to_hw, + .reset_back_end_for_pipe = dcn20_reset_back_end_for_pipe, }; void dcn32_hw_sequencer_init_functions(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index a543993833181..64ca7c66509b7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -379,6 +379,7 @@ struct hw_sequencer_funcs { struct dc_cursor_attributes *cursor_attr); void (*commit_subvp_config)(struct dc *dc, struct dc_state *context); void (*enable_phantom_streams)(struct dc *dc, struct dc_state *context); + void (*disable_phantom_streams)(struct dc *dc, struct dc_state *context); void (*subvp_pipe_control_lock)(struct dc *dc, struct dc_state *context, bool lock, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h index 6137cf09aa54d..b3c62a82cb1cf 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer_private.h @@ -165,8 +165,15 @@ struct hwseq_private_funcs { void (*set_pixels_per_cycle)(struct pipe_ctx *pipe_ctx); void (*resync_fifo_dccg_dio)(struct dce_hwseq *hws, struct dc *dc, struct dc_state *context); + enum dc_status (*apply_single_controller_ctx_to_hw)( + struct pipe_ctx *pipe_ctx, + struct dc_state *context, + struct dc *dc); bool (*is_dp_dig_pixel_rate_div_policy)(struct pipe_ctx *pipe_ctx); #endif + void (*reset_back_end_for_pipe)(struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context); }; struct dce_hwseq { -- GitLab From 29c5da1a124671caa87c4a936c625432c16ad8ca Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 11 Jan 2024 14:25:27 -0700 Subject: [PATCH 0786/1405] drm/amd/display: Disable ODM by default for DCN35 Just ensure that ODM optimization is disabled by default. Acked-by: Hamza Mahfooz Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 2142cee6cac23..1c3d89264ef72 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -2130,6 +2130,7 @@ static bool dcn35_resource_construct( dc->dml2_options.dcn_pipe_count = pool->base.pipe_count; dc->dml2_options.use_native_pstate_optimization = true; dc->dml2_options.use_native_soc_bb_construction = true; + dc->dml2_options.minimize_dispclk_using_odm = false; if (dc->config.EnableMinDispClkODM) dc->dml2_options.minimize_dispclk_using_odm = true; dc->dml2_options.enable_windowed_mpo_odm = dc->config.enable_windowed_mpo_odm; -- GitLab From 55173942a63668bdc1d61812c7c9e0406aefb5bf Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 31 Jan 2024 16:34:32 +0530 Subject: [PATCH 0787/1405] drm/amdgpu: Avoid fetching VRAM vendor info The present way to fetch VRAM vendor information turns out to be not reliable on GFX 9.4.3 dGPUs as well. Avoid using the data. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 40a00ea0009f6..e67a62db9e126 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1947,14 +1947,6 @@ static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev) static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev) { - static const u32 regBIF_BIOS_SCRATCH_4 = 0x50; - u32 vram_info; - - /* Only for dGPU, vendor informaton is reliable */ - if (!amdgpu_sriov_vf(adev) && !(adev->flags & AMD_IS_APU)) { - vram_info = RREG32(regBIF_BIOS_SCRATCH_4); - adev->gmc.vram_vendor = vram_info & 0xF; - } adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; adev->gmc.vram_width = 128 * 64; } -- GitLab From da48914e1fcdbf57f6b95d4552fcc088e6547ce4 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 2 Feb 2024 18:30:59 -0600 Subject: [PATCH 0788/1405] drm/amd/display: Clear phantom stream count and plane count When dc_state_destruct() was refactored the new phantom_stream_count and phantom_plane_count members weren't cleared. Fixes: 012a04b1d6af ("drm/amd/display: Refactor phantom resource allocation") Acked-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_state.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index 88c6436b28b69..180ac47868c22 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -291,11 +291,14 @@ void dc_state_destruct(struct dc_state *state) dc_stream_release(state->phantom_streams[i]); state->phantom_streams[i] = NULL; } + state->phantom_stream_count = 0; for (i = 0; i < state->phantom_plane_count; i++) { dc_plane_state_release(state->phantom_planes[i]); state->phantom_planes[i] = NULL; } + state->phantom_plane_count = 0; + state->stream_mask = 0; memset(&state->res_ctx, 0, sizeof(state->res_ctx)); memset(&state->pp_display_cfg, 0, sizeof(state->pp_display_cfg)); -- GitLab From e63e35f0164c43fbc1adb481d6604f253b9f9667 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 5 Feb 2024 14:54:05 -0700 Subject: [PATCH 0789/1405] drm/amd/display: Increase frame-larger-than for all display_mode_vba files After a recent change in LLVM, allmodconfig (which has CONFIG_KCSAN=y and CONFIG_WERROR=y enabled) has a few new instances of -Wframe-larger-than for the mode support and system configuration functions: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn20/display_mode_vba_20v2.c:3393:6: error: stack frame size (2144) exceeds limit (2048) in 'dml20v2_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] 3393 | void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) | ^ 1 error generated. drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn21/display_mode_vba_21.c:3520:6: error: stack frame size (2192) exceeds limit (2048) in 'dml21_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] 3520 | void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) | ^ 1 error generated. drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn20/display_mode_vba_20.c:3286:6: error: stack frame size (2128) exceeds limit (2048) in 'dml20_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than] 3286 | void dml20_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) | ^ 1 error generated. Without the sanitizers enabled, there are no warnings. This was the catalyst for commit 6740ec97bcdb ("drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml2") and that same change was made to dml in commit 5b750b22530f ("drm/amd/display: Increase frame warning limit with KASAN or KCSAN in dml") but the frame_warn_flag variable was not applied to all files. Do so now to clear up the warnings and make all these files consistent. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issue/1990 Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 6042a5a6a44f8..59ade76ffb18d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -72,11 +72,11 @@ CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) -- GitLab From 58fca355ad37dcb5f785d9095db5f748b79c5dc2 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 7 Feb 2024 10:20:57 +0530 Subject: [PATCH 0790/1405] drm/amd/display: Implement bounds check for stream encoder creation in DCN301 'stream_enc_regs' array is an array of dcn10_stream_enc_registers structures. The array is initialized with four elements, corresponding to the four calls to stream_enc_regs() in the array initializer. This means that valid indices for this array are 0, 1, 2, and 3. The error message 'stream_enc_regs' 4 <= 5 below, is indicating that there is an attempt to access this array with an index of 5, which is out of bounds. This could lead to undefined behavior Here, eng_id is used as an index to access the stream_enc_regs array. If eng_id is 5, this would result in an out-of-bounds access on the stream_enc_regs array. Thus fixing Buffer overflow error in dcn301_stream_encoder_create reported by Smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/resource/dcn301/dcn301_resource.c:1011 dcn301_stream_encoder_create() error: buffer overflow 'stream_enc_regs' 4 <= 5 Fixes: 3a83e4e64bb1 ("drm/amd/display: Add dcn3.01 support to DC (v2)") Cc: Roman Li Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam Reviewed-by: Roman Li Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c index 511ff6b5b9856..7538b548c5725 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c @@ -999,7 +999,7 @@ static struct stream_encoder *dcn301_stream_encoder_create(enum engine_id eng_id vpg = dcn301_vpg_create(ctx, vpg_inst); afmt = dcn301_afmt_create(ctx, afmt_inst); - if (!enc1 || !vpg || !afmt) { + if (!enc1 || !vpg || !afmt || eng_id >= ARRAY_SIZE(stream_enc_regs)) { kfree(enc1); kfree(vpg); kfree(afmt); -- GitLab From 534c8a5b9d5d41d30cdcac93cfa1bca5e17be009 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 8 Dec 2023 13:48:22 +0530 Subject: [PATCH 0791/1405] drm/amdgpu: Fix HDP flush for VFs on nbio v7.9 HDP flush remapping is not done for VFs. Keep the original offsets in VF environment. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index e90f337808034..b4723d68eab0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -431,6 +431,12 @@ static void nbio_v7_9_init_registers(struct amdgpu_device *adev) u32 inst_mask; int i; + if (amdgpu_sriov_vf(adev)) + adev->rmmio_remap.reg_offset = + SOC15_REG_OFFSET( + NBIO, 0, + regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) + << 2; WREG32_SOC15(NBIO, 0, regXCC_DOORBELL_FENCE, 0xff & ~(adev->gfx.xcc_mask)); -- GitLab From 4054705215ad7e6dd8e4e6ff27c39abd7667f700 Mon Sep 17 00:00:00 2001 From: Francis Pravin Date: Wed, 7 Feb 2024 05:04:17 +0530 Subject: [PATCH 0792/1405] nvme: use ns->head->pi_size instead of t10_pi_tuple structure size Currently kernel supports 8 byte and 16 byte protection information. So, use ns->head->pi_size instead of sizeof(struct t10_pi_tuple). Signed-off-by: Francis Pravin Signed-off-by: Sathyavathi M Signed-off-by: Keith Busch --- drivers/nvme/host/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 18f5c1be5d67e..3dfd5ae99ae05 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -228,7 +228,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) length = (io.nblocks + 1) << ns->head->lba_shift; if ((io.control & NVME_RW_PRINFO_PRACT) && - ns->head->ms == sizeof(struct t10_pi_tuple)) { + (ns->head->ms == ns->head->pi_size)) { /* * Protection information is stripped/inserted by the * controller. -- GitLab From a12bc36032a2f7917068f9ce9eb26d869e54b31a Mon Sep 17 00:00:00 2001 From: Yang Li Date: Fri, 2 Feb 2024 16:13:17 +0800 Subject: [PATCH 0793/1405] ksmbd: Add kernel-doc for ksmbd_extract_sharename() function The ksmbd_extract_sharename() function lacked a complete kernel-doc comment. This patch adds parameter descriptions and detailed function behavior to improve code readability and maintainability. Signed-off-by: Yang Li Acked-by: Randy Dunlap Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/misc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/server/misc.c b/fs/smb/server/misc.c index 9e8afaa686e3a..1a5faa6f6e7bc 100644 --- a/fs/smb/server/misc.c +++ b/fs/smb/server/misc.c @@ -261,6 +261,7 @@ char *ksmbd_casefold_sharename(struct unicode_map *um, const char *name) /** * ksmbd_extract_sharename() - get share name from tree connect request + * @um: pointer to a unicode_map structure for character encoding handling * @treename: buffer containing tree name and share name * * Return: share name on success, otherwise error -- GitLab From 108a020c64434fed4b69762879d78cd24088b4c7 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Mon, 5 Feb 2024 14:19:16 +0300 Subject: [PATCH 0794/1405] ksmbd: free aux buffer if ksmbd_iov_pin_rsp_read fails ksmbd_iov_pin_rsp_read() doesn't free the provided aux buffer if it fails. Seems to be the caller's responsibility to clear the buffer in error case. Found by Linux Verification Center (linuxtesting.org). Fixes: e2b76ab8b5c9 ("ksmbd: add support for read compound") Cc: stable@vger.kernel.org Signed-off-by: Fedor Pchelkin Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index ba7a72a6a4f45..0c97d3c860726 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -6173,8 +6173,10 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work) err = ksmbd_iov_pin_rsp_read(work, (void *)rsp, offsetof(struct smb2_read_rsp, Buffer), aux_payload_buf, nbytes); - if (err) + if (err) { + kvfree(aux_payload_buf); goto out; + } kvfree(rpc_resp); } else { err = ksmbd_iov_pin_rsp(work, (void *)rsp, @@ -6384,8 +6386,10 @@ int smb2_read(struct ksmbd_work *work) err = ksmbd_iov_pin_rsp_read(work, (void *)rsp, offsetof(struct smb2_read_rsp, Buffer), aux_payload_buf, nbytes); - if (err) + if (err) { + kvfree(aux_payload_buf); goto out; + } ksmbd_fd_put(work, fp); return 0; -- GitLab From e656c7a9e59607d1672d85ffa9a89031876ffe67 Mon Sep 17 00:00:00 2001 From: Prakash Sangappa Date: Tue, 23 Jan 2024 12:04:42 -0800 Subject: [PATCH 0795/1405] mm: hugetlb pages should not be reserved by shmat() if SHM_NORESERVE For shared memory of type SHM_HUGETLB, hugetlb pages are reserved in shmget() call. If SHM_NORESERVE flags is specified then the hugetlb pages are not reserved. However when the shared memory is attached with the shmat() call the hugetlb pages are getting reserved incorrectly for SHM_HUGETLB shared memory created with SHM_NORESERVE which is a bug. ------------------------------- Following test shows the issue. $cat shmhtb.c int main() { int shmflags = 0660 | IPC_CREAT | SHM_HUGETLB | SHM_NORESERVE; int shmid; shmid = shmget(SKEY, SHMSZ, shmflags); if (shmid < 0) { printf("shmat: shmget() failed, %d\n", errno); return 1; } printf("After shmget()\n"); system("cat /proc/meminfo | grep -i hugepages_"); shmat(shmid, NULL, 0); printf("\nAfter shmat()\n"); system("cat /proc/meminfo | grep -i hugepages_"); shmctl(shmid, IPC_RMID, NULL); return 0; } #sysctl -w vm.nr_hugepages=20 #./shmhtb After shmget() HugePages_Total: 20 HugePages_Free: 20 HugePages_Rsvd: 0 HugePages_Surp: 0 After shmat() HugePages_Total: 20 HugePages_Free: 20 HugePages_Rsvd: 5 <-- HugePages_Surp: 0 -------------------------------- Fix is to ensure that hugetlb pages are not reserved for SHM_HUGETLB shared memory in the shmat() call. Link: https://lkml.kernel.org/r/1706040282-12388-1-git-send-email-prakash.sangappa@oracle.com Signed-off-by: Prakash Sangappa Acked-by: Muchun Song Cc: Signed-off-by: Andrew Morton --- fs/hugetlbfs/inode.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 671664fed3077..ee13c2ca8ad29 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -100,6 +100,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) loff_t len, vma_len; int ret; struct hstate *h = hstate_file(file); + vm_flags_t vm_flags; /* * vma address alignment (but not the pgoff alignment) has @@ -141,10 +142,20 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) file_accessed(file); ret = -ENOMEM; + + vm_flags = vma->vm_flags; + /* + * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip + * reserving here. Note: only for SHM hugetlbfs file, the inode + * flag S_PRIVATE is set. + */ + if (inode->i_flags & S_PRIVATE) + vm_flags |= VM_NORESERVE; + if (!hugetlb_reserve_pages(inode, vma->vm_pgoff >> huge_page_order(h), len >> huge_page_shift(h), vma, - vma->vm_flags)) + vm_flags)) goto out; ret = 0; -- GitLab From daa694e4137571b4ebec330f9a9b4d54aa8b8089 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 22 Jan 2024 16:50:50 +0100 Subject: [PATCH 0796/1405] getrusage: move thread_group_cputime_adjusted() outside of lock_task_sighand() Patch series "getrusage: use sig->stats_lock", v2. This patch (of 2): thread_group_cputime() does its own locking, we can safely shift thread_group_cputime_adjusted() which does another for_each_thread loop outside of ->siglock protected section. This is also preparation for the next patch which changes getrusage() to use stats_lock instead of siglock, thread_group_cputime() takes the same lock. With the current implementation recursive read_seqbegin_or_lock() is fine, thread_group_cputime() can't enter the slow mode if the caller holds stats_lock, yet this looks more safe and better performance-wise. Link: https://lkml.kernel.org/r/20240122155023.GA26169@redhat.com Link: https://lkml.kernel.org/r/20240122155050.GA26205@redhat.com Signed-off-by: Oleg Nesterov Reported-by: Dylan Hatch Tested-by: Dylan Hatch Cc: Eric W. Biederman Cc: Signed-off-by: Andrew Morton --- kernel/sys.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index e219fcfa112d8..70ad06ad852e5 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1785,17 +1785,19 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) struct task_struct *t; unsigned long flags; u64 tgutime, tgstime, utime, stime; - unsigned long maxrss = 0; + unsigned long maxrss; + struct mm_struct *mm; struct signal_struct *sig = p->signal; - memset((char *)r, 0, sizeof (*r)); + memset(r, 0, sizeof(*r)); utime = stime = 0; + maxrss = 0; if (who == RUSAGE_THREAD) { task_cputime_adjusted(current, &utime, &stime); accumulate_thread_rusage(p, r); maxrss = sig->maxrss; - goto out; + goto out_thread; } if (!lock_task_sighand(p, &flags)) @@ -1819,9 +1821,6 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) fallthrough; case RUSAGE_SELF: - thread_group_cputime_adjusted(p, &tgutime, &tgstime); - utime += tgutime; - stime += tgstime; r->ru_nvcsw += sig->nvcsw; r->ru_nivcsw += sig->nivcsw; r->ru_minflt += sig->min_flt; @@ -1839,19 +1838,24 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) } unlock_task_sighand(p, &flags); -out: - r->ru_utime = ns_to_kernel_old_timeval(utime); - r->ru_stime = ns_to_kernel_old_timeval(stime); + if (who == RUSAGE_CHILDREN) + goto out_children; - if (who != RUSAGE_CHILDREN) { - struct mm_struct *mm = get_task_mm(p); + thread_group_cputime_adjusted(p, &tgutime, &tgstime); + utime += tgutime; + stime += tgstime; - if (mm) { - setmax_mm_hiwater_rss(&maxrss, mm); - mmput(mm); - } +out_thread: + mm = get_task_mm(p); + if (mm) { + setmax_mm_hiwater_rss(&maxrss, mm); + mmput(mm); } + +out_children: r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ + r->ru_utime = ns_to_kernel_old_timeval(utime); + r->ru_stime = ns_to_kernel_old_timeval(stime); } SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) -- GitLab From f7ec1cd5cc7ef3ad964b677ba82b8b77f1c93009 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 22 Jan 2024 16:50:53 +0100 Subject: [PATCH 0797/1405] getrusage: use sig->stats_lock rather than lock_task_sighand() lock_task_sighand() can trigger a hard lockup. If NR_CPUS threads call getrusage() at the same time and the process has NR_THREADS, spin_lock_irq will spin with irqs disabled O(NR_CPUS * NR_THREADS) time. Change getrusage() to use sig->stats_lock, it was specifically designed for this type of use. This way it runs lockless in the likely case. TODO: - Change do_task_stat() to use sig->stats_lock too, then we can remove spin_lock_irq(siglock) in wait_task_zombie(). - Turn sig->stats_lock into seqcount_rwlock_t, this way the readers in the slow mode won't exclude each other. See https://lore.kernel.org/all/20230913154907.GA26210@redhat.com/ - stats_lock has to disable irqs because ->siglock can be taken in irq context, it would be very nice to change __exit_signal() to avoid the siglock->stats_lock dependency. Link: https://lkml.kernel.org/r/20240122155053.GA26214@redhat.com Signed-off-by: Oleg Nesterov Reported-by: Dylan Hatch Tested-by: Dylan Hatch Cc: Eric W. Biederman Cc: Signed-off-by: Andrew Morton --- kernel/sys.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index 70ad06ad852e5..f8e543f1e38a0 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1788,7 +1788,9 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) unsigned long maxrss; struct mm_struct *mm; struct signal_struct *sig = p->signal; + unsigned int seq = 0; +retry: memset(r, 0, sizeof(*r)); utime = stime = 0; maxrss = 0; @@ -1800,8 +1802,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) goto out_thread; } - if (!lock_task_sighand(p, &flags)) - return; + flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); switch (who) { case RUSAGE_BOTH: @@ -1829,14 +1830,23 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) r->ru_oublock += sig->oublock; if (maxrss < sig->maxrss) maxrss = sig->maxrss; + + rcu_read_lock(); __for_each_thread(sig, t) accumulate_thread_rusage(t, r); + rcu_read_unlock(); + break; default: BUG(); } - unlock_task_sighand(p, &flags); + + if (need_seqretry(&sig->stats_lock, seq)) { + seq = 1; + goto retry; + } + done_seqretry_irqrestore(&sig->stats_lock, seq, flags); if (who == RUSAGE_CHILDREN) goto out_children; -- GitLab From 60f92acb60a989b14e4b744501a0df0f82ef30a3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 23 Jan 2024 16:33:55 +0100 Subject: [PATCH 0798/1405] fs/proc: do_task_stat: move thread_group_cputime_adjusted() outside of lock_task_sighand() Patch series "fs/proc: do_task_stat: use sig->stats_". do_task_stat() has the same problem as getrusage() had before "getrusage: use sig->stats_lock rather than lock_task_sighand()": a hard lockup. If NR_CPUS threads call lock_task_sighand() at the same time and the process has NR_THREADS, spin_lock_irq will spin with irqs disabled O(NR_CPUS * NR_THREADS) time. This patch (of 3): thread_group_cputime() does its own locking, we can safely shift thread_group_cputime_adjusted() which does another for_each_thread loop outside of ->siglock protected section. Not only this removes for_each_thread() from the critical section with irqs disabled, this removes another case when stats_lock is taken with siglock held. We want to remove this dependency, then we can change the users of stats_lock to not disable irqs. Link: https://lkml.kernel.org/r/20240123153313.GA21832@redhat.com Link: https://lkml.kernel.org/r/20240123153355.GA21854@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Dylan Hatch Cc: Eric W. Biederman Cc: Signed-off-by: Andrew Morton --- fs/proc/array.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index ff08a8957552a..45ba918638084 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -511,7 +511,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, sigemptyset(&sigign); sigemptyset(&sigcatch); - cutime = cstime = utime = stime = 0; + cutime = cstime = 0; cgtime = gtime = 0; if (lock_task_sighand(task, &flags)) { @@ -546,7 +546,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, min_flt += sig->min_flt; maj_flt += sig->maj_flt; - thread_group_cputime_adjusted(task, &utime, &stime); gtime += sig->gtime; if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED)) @@ -562,10 +561,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, if (permitted && (!whole || num_threads < 2)) wchan = !task_is_running(task); - if (!whole) { + + if (whole) { + thread_group_cputime_adjusted(task, &utime, &stime); + } else { + task_cputime_adjusted(task, &utime, &stime); min_flt = task->min_flt; maj_flt = task->maj_flt; - task_cputime_adjusted(task, &utime, &stime); gtime = task_gtime(task); } -- GitLab From 7601df8031fd67310af891897ef6cc0df4209305 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 23 Jan 2024 16:33:57 +0100 Subject: [PATCH 0799/1405] fs/proc: do_task_stat: use sig->stats_lock to gather the threads/children stats lock_task_sighand() can trigger a hard lockup. If NR_CPUS threads call do_task_stat() at the same time and the process has NR_THREADS, it will spin with irqs disabled O(NR_CPUS * NR_THREADS) time. Change do_task_stat() to use sig->stats_lock to gather the statistics outside of ->siglock protected section, in the likely case this code will run lockless. Link: https://lkml.kernel.org/r/20240123153357.GA21857@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Dylan Hatch Cc: Eric W. Biederman Cc: Signed-off-by: Andrew Morton --- fs/proc/array.c | 58 +++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index 45ba918638084..34a47fb0c57f2 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -477,13 +477,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, int permitted; struct mm_struct *mm; unsigned long long start_time; - unsigned long cmin_flt = 0, cmaj_flt = 0; - unsigned long min_flt = 0, maj_flt = 0; - u64 cutime, cstime, utime, stime; - u64 cgtime, gtime; + unsigned long cmin_flt, cmaj_flt, min_flt, maj_flt; + u64 cutime, cstime, cgtime, utime, stime, gtime; unsigned long rsslim = 0; unsigned long flags; int exit_code = task->exit_code; + struct signal_struct *sig = task->signal; + unsigned int seq = 1; state = *get_task_state(task); vsize = eip = esp = 0; @@ -511,12 +511,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, sigemptyset(&sigign); sigemptyset(&sigcatch); - cutime = cstime = 0; - cgtime = gtime = 0; if (lock_task_sighand(task, &flags)) { - struct signal_struct *sig = task->signal; - if (sig->tty) { struct pid *pgrp = tty_get_pgrp(sig->tty); tty_pgrp = pid_nr_ns(pgrp, ns); @@ -527,27 +523,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, num_threads = get_nr_threads(task); collect_sigign_sigcatch(task, &sigign, &sigcatch); - cmin_flt = sig->cmin_flt; - cmaj_flt = sig->cmaj_flt; - cutime = sig->cutime; - cstime = sig->cstime; - cgtime = sig->cgtime; rsslim = READ_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur); - /* add up live thread stats at the group level */ if (whole) { - struct task_struct *t; - - __for_each_thread(sig, t) { - min_flt += t->min_flt; - maj_flt += t->maj_flt; - gtime += task_gtime(t); - } - - min_flt += sig->min_flt; - maj_flt += sig->maj_flt; - gtime += sig->gtime; - if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED)) exit_code = sig->group_exit_code; } @@ -562,6 +540,34 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, if (permitted && (!whole || num_threads < 2)) wchan = !task_is_running(task); + do { + seq++; /* 2 on the 1st/lockless path, otherwise odd */ + flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); + + cmin_flt = sig->cmin_flt; + cmaj_flt = sig->cmaj_flt; + cutime = sig->cutime; + cstime = sig->cstime; + cgtime = sig->cgtime; + + if (whole) { + struct task_struct *t; + + min_flt = sig->min_flt; + maj_flt = sig->maj_flt; + gtime = sig->gtime; + + rcu_read_lock(); + __for_each_thread(sig, t) { + min_flt += t->min_flt; + maj_flt += t->maj_flt; + gtime += task_gtime(t); + } + rcu_read_unlock(); + } + } while (need_seqretry(&sig->stats_lock, seq)); + done_seqretry_irqrestore(&sig->stats_lock, seq, flags); + if (whole) { thread_group_cputime_adjusted(task, &utime, &stime); } else { -- GitLab From c1be35a16b2f1fe21f4f26f9de030ad6eaaf6a25 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 23 Jan 2024 16:34:00 +0100 Subject: [PATCH 0800/1405] exit: wait_task_zombie: kill the no longer necessary spin_lock_irq(siglock) After the recent changes nobody use siglock to read the values protected by stats_lock, we can kill spin_lock_irq(¤t->sighand->siglock) and update the comment. With this patch only __exit_signal() and thread_group_start_cputime() take stats_lock under siglock. Link: https://lkml.kernel.org/r/20240123153359.GA21866@redhat.com Signed-off-by: Oleg Nesterov Signed-off-by: Dylan Hatch Cc: Eric W. Biederman Cc: Signed-off-by: Andrew Morton --- kernel/exit.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index 3988a02efaef0..dfb963d2f862a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1127,17 +1127,14 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) * and nobody can change them. * * psig->stats_lock also protects us from our sub-threads - * which can reap other children at the same time. Until - * we change k_getrusage()-like users to rely on this lock - * we have to take ->siglock as well. + * which can reap other children at the same time. * * We use thread_group_cputime_adjusted() to get times for * the thread group, which consolidates times for all threads * in the group including the group leader. */ thread_group_cputime_adjusted(p, &tgutime, &tgstime); - spin_lock_irq(¤t->sighand->siglock); - write_seqlock(&psig->stats_lock); + write_seqlock_irq(&psig->stats_lock); psig->cutime += tgutime + sig->cutime; psig->cstime += tgstime + sig->cstime; psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime; @@ -1160,8 +1157,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) psig->cmaxrss = maxrss; task_io_accounting_add(&psig->ioac, &p->ioac); task_io_accounting_add(&psig->ioac, &sig->ioac); - write_sequnlock(&psig->stats_lock); - spin_unlock_irq(¤t->sighand->siglock); + write_sequnlock_irq(&psig->stats_lock); } if (wo->wo_rusage) -- GitLab From 56ae10cf628b02279980d17439c6241a643959c2 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Tue, 23 Jan 2024 14:17:55 +0000 Subject: [PATCH 0801/1405] mm/userfaultfd: UFFDIO_MOVE implementation should use ptep_get() Commit c33c794828f2 ("mm: ptep_get() conversion") converted all (non-arch) call sites to use ptep_get() instead of doing a direct dereference of the pte. Full rationale can be found in that commit's log. Since then, UFFDIO_MOVE has been implemented which does 7 direct pte dereferences. Let's fix those up to use ptep_get(). I've asserted in the past that there is no reliable automated mechanism to catch these; I'm relying on a combination of Coccinelle (which throws up a lot of false positives) and some compiler magic to force a compiler error on dereference. But given the frequency with which new issues are coming up, I'll add it to my todo list to try to find an automated solution. Link: https://lkml.kernel.org/r/20240123141755.3836179-1-ryan.roberts@arm.com Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") Signed-off-by: Ryan Roberts Reviewed-by: Suren Baghdasaryan Cc: Andrea Arcangeli Cc: David Hildenbrand Signed-off-by: Andrew Morton --- mm/userfaultfd.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 75fcf1f783bc5..7cf7d43842590 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -902,8 +902,8 @@ static int move_present_pte(struct mm_struct *mm, double_pt_lock(dst_ptl, src_ptl); - if (!pte_same(*src_pte, orig_src_pte) || - !pte_same(*dst_pte, orig_dst_pte)) { + if (!pte_same(ptep_get(src_pte), orig_src_pte) || + !pte_same(ptep_get(dst_pte), orig_dst_pte)) { err = -EAGAIN; goto out; } @@ -946,8 +946,8 @@ static int move_swap_pte(struct mm_struct *mm, double_pt_lock(dst_ptl, src_ptl); - if (!pte_same(*src_pte, orig_src_pte) || - !pte_same(*dst_pte, orig_dst_pte)) { + if (!pte_same(ptep_get(src_pte), orig_src_pte) || + !pte_same(ptep_get(dst_pte), orig_dst_pte)) { double_pt_unlock(dst_ptl, src_ptl); return -EAGAIN; } @@ -1016,7 +1016,7 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, } spin_lock(dst_ptl); - orig_dst_pte = *dst_pte; + orig_dst_pte = ptep_get(dst_pte); spin_unlock(dst_ptl); if (!pte_none(orig_dst_pte)) { err = -EEXIST; @@ -1024,7 +1024,7 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, } spin_lock(src_ptl); - orig_src_pte = *src_pte; + orig_src_pte = ptep_get(src_pte); spin_unlock(src_ptl); if (pte_none(orig_src_pte)) { if (!(mode & UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES)) @@ -1054,7 +1054,7 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, * page isn't freed under us */ spin_lock(src_ptl); - if (!pte_same(orig_src_pte, *src_pte)) { + if (!pte_same(orig_src_pte, ptep_get(src_pte))) { spin_unlock(src_ptl); err = -EAGAIN; goto out; -- GitLab From 67b8bcbaed4777871bb0dcc888fb02a614a98ab1 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 24 Jan 2024 21:19:36 +0900 Subject: [PATCH 0802/1405] nilfs2: fix data corruption in dsync block recovery for small block sizes The helper function nilfs_recovery_copy_block() of nilfs_recovery_dsync_blocks(), which recovers data from logs created by data sync writes during a mount after an unclean shutdown, incorrectly calculates the on-page offset when copying repair data to the file's page cache. In environments where the block size is smaller than the page size, this flaw can cause data corruption and leak uninitialized memory bytes during the recovery process. Fix these issues by correcting this byte offset calculation on the page. Link: https://lkml.kernel.org/r/20240124121936.10575-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton --- fs/nilfs2/recovery.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 0955b657938ff..a9b8d77c8c1d5 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -472,9 +472,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, struct nilfs_recovery_block *rb, - struct page *page) + loff_t pos, struct page *page) { struct buffer_head *bh_org; + size_t from = pos & ~PAGE_MASK; void *kaddr; bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize); @@ -482,7 +483,7 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, return -EIO; kaddr = kmap_atomic(page); - memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size); + memcpy(kaddr + from, bh_org->b_data, bh_org->b_size); kunmap_atomic(kaddr); brelse(bh_org); return 0; @@ -521,7 +522,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, goto failed_inode; } - err = nilfs_recovery_copy_block(nilfs, rb, page); + err = nilfs_recovery_copy_block(nilfs, rb, pos, page); if (unlikely(err)) goto failed_page; -- GitLab From 9cee7e8ef3e31ca25b40ca52b8585dc6935deff2 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Wed, 24 Jan 2024 10:00:22 +0000 Subject: [PATCH 0803/1405] mm: memcg: optimize parent iteration in memcg_rstat_updated() In memcg_rstat_updated(), we iterate the memcg being updated and its parents to update memcg->vmstats_percpu->stats_updates in the fast path (i.e. no atomic updates). According to my math, this is 3 memory loads (and potentially 3 cache misses) per memcg: - Load the address of memcg->vmstats_percpu. - Load vmstats_percpu->stats_updates (based on some percpu calculation). - Load the address of the parent memcg. Avoid most of the cache misses by caching a pointer from each struct memcg_vmstats_percpu to its parent on the corresponding CPU. In this case, for the first memcg we have 2 memory loads (same as above): - Load the address of memcg->vmstats_percpu. - Load vmstats_percpu->stats_updates (based on some percpu calculation). Then for each additional memcg, we need a single load to get the parent's stats_updates directly. This reduces the number of loads from O(3N) to O(2+N) -- where N is the number of memcgs we need to iterate. Additionally, stash a pointer to memcg->vmstats in each struct memcg_vmstats_percpu such that we can access the atomic counter that all CPUs fold into, memcg->vmstats->stats_updates. memcg_should_flush_stats() is changed to memcg_vmstats_needs_flush() to accept a struct memcg_vmstats pointer accordingly. In struct memcg_vmstats_percpu, make sure both pointers together with stats_updates live on the same cacheline. Finally, update mem_cgroup_alloc() to take in a parent pointer and initialize the new cache pointers on each CPU. The percpu loop in mem_cgroup_alloc() may look concerning, but there are multiple similar loops in the cgroup creation path (e.g. cgroup_rstat_init()), most of which are hidden within alloc_percpu(). According to Oliver's testing [1], this fixes multiple 30-38% regressions in vm-scalability, will-it-scale-tlb_flush2, and will-it-scale-fallocate1. This comes at a cost of 2 more pointers per CPU (<2KB on a machine with 128 CPUs). [1] https://lore.kernel.org/lkml/ZbDJsfsZt2ITyo61@xsang-OptiPlex-9020/ [yosryahmed@google.com: fix struct memcg_vmstats_percpu size and alignment] Link: https://lkml.kernel.org/r/20240203044612.1234216-1-yosryahmed@google.com Link: https://lkml.kernel.org/r/20240124100023.660032-1-yosryahmed@google.com Signed-off-by: Yosry Ahmed Fixes: 8d59d2214c23 ("mm: memcg: make stats flushing threshold per-memcg") Tested-by: kernel test robot Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202401221624.cb53a8ca-oliver.sang@intel.com Acked-by: Shakeel Butt Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Cc: Greg Thelen Signed-off-by: Andrew Morton --- mm/memcontrol.c | 56 ++++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 46d8d02114cfe..1ed40f9d3a277 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -621,6 +621,15 @@ static inline int memcg_events_index(enum vm_event_item idx) } struct memcg_vmstats_percpu { + /* Stats updates since the last flush */ + unsigned int stats_updates; + + /* Cached pointers for fast iteration in memcg_rstat_updated() */ + struct memcg_vmstats_percpu *parent; + struct memcg_vmstats *vmstats; + + /* The above should fit a single cacheline for memcg_rstat_updated() */ + /* Local (CPU and cgroup) page state & events */ long state[MEMCG_NR_STAT]; unsigned long events[NR_MEMCG_EVENTS]; @@ -632,10 +641,7 @@ struct memcg_vmstats_percpu { /* Cgroup1: threshold notifications & softlimit tree updates */ unsigned long nr_page_events; unsigned long targets[MEM_CGROUP_NTARGETS]; - - /* Stats updates since the last flush */ - unsigned int stats_updates; -}; +} ____cacheline_aligned; struct memcg_vmstats { /* Aggregated (CPU and subtree) page state & events */ @@ -698,36 +704,35 @@ static void memcg_stats_unlock(void) } -static bool memcg_should_flush_stats(struct mem_cgroup *memcg) +static bool memcg_vmstats_needs_flush(struct memcg_vmstats *vmstats) { - return atomic64_read(&memcg->vmstats->stats_updates) > + return atomic64_read(&vmstats->stats_updates) > MEMCG_CHARGE_BATCH * num_online_cpus(); } static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val) { + struct memcg_vmstats_percpu *statc; int cpu = smp_processor_id(); - unsigned int x; if (!val) return; cgroup_rstat_updated(memcg->css.cgroup, cpu); - - for (; memcg; memcg = parent_mem_cgroup(memcg)) { - x = __this_cpu_add_return(memcg->vmstats_percpu->stats_updates, - abs(val)); - - if (x < MEMCG_CHARGE_BATCH) + statc = this_cpu_ptr(memcg->vmstats_percpu); + for (; statc; statc = statc->parent) { + statc->stats_updates += abs(val); + if (statc->stats_updates < MEMCG_CHARGE_BATCH) continue; /* * If @memcg is already flush-able, increasing stats_updates is * redundant. Avoid the overhead of the atomic update. */ - if (!memcg_should_flush_stats(memcg)) - atomic64_add(x, &memcg->vmstats->stats_updates); - __this_cpu_write(memcg->vmstats_percpu->stats_updates, 0); + if (!memcg_vmstats_needs_flush(statc->vmstats)) + atomic64_add(statc->stats_updates, + &statc->vmstats->stats_updates); + statc->stats_updates = 0; } } @@ -756,7 +761,7 @@ void mem_cgroup_flush_stats(struct mem_cgroup *memcg) if (!memcg) memcg = root_mem_cgroup; - if (memcg_should_flush_stats(memcg)) + if (memcg_vmstats_needs_flush(memcg->vmstats)) do_flush_stats(memcg); } @@ -770,7 +775,7 @@ void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg) static void flush_memcg_stats_dwork(struct work_struct *w) { /* - * Deliberately ignore memcg_should_flush_stats() here so that flushing + * Deliberately ignore memcg_vmstats_needs_flush() here so that flushing * in latency-sensitive paths is as cheap as possible. */ do_flush_stats(root_mem_cgroup); @@ -5477,10 +5482,11 @@ static void mem_cgroup_free(struct mem_cgroup *memcg) __mem_cgroup_free(memcg); } -static struct mem_cgroup *mem_cgroup_alloc(void) +static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent) { + struct memcg_vmstats_percpu *statc, *pstatc; struct mem_cgroup *memcg; - int node; + int node, cpu; int __maybe_unused i; long error = -ENOMEM; @@ -5504,6 +5510,14 @@ static struct mem_cgroup *mem_cgroup_alloc(void) if (!memcg->vmstats_percpu) goto fail; + for_each_possible_cpu(cpu) { + if (parent) + pstatc = per_cpu_ptr(parent->vmstats_percpu, cpu); + statc = per_cpu_ptr(memcg->vmstats_percpu, cpu); + statc->parent = parent ? pstatc : NULL; + statc->vmstats = memcg->vmstats; + } + for_each_node(node) if (alloc_mem_cgroup_per_node_info(memcg, node)) goto fail; @@ -5549,7 +5563,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) struct mem_cgroup *memcg, *old_memcg; old_memcg = set_active_memcg(parent); - memcg = mem_cgroup_alloc(); + memcg = mem_cgroup_alloc(parent); set_active_memcg(old_memcg); if (IS_ERR(memcg)) return ERR_CAST(memcg); -- GitLab From 2fde9e7f9e6dc38e1d7091b9705c22be945c8697 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 24 Jan 2024 16:40:14 +0800 Subject: [PATCH 0804/1405] mm/memory-failure: fix crash in split_huge_page_to_list from soft_offline_page When I did soft offline stress test, a machine was observed to crash with the following message: kernel BUG at include/linux/memcontrol.h:554! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 5 PID: 3837 Comm: hwpoison.sh Not tainted 6.7.0-next-20240112-00001-g8ecf3e7fb7c8-dirty #97 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 RIP: 0010:folio_memcg+0xaf/0xd0 Code: 10 5b 5d c3 cc cc cc cc 48 c7 c6 08 b1 f2 b2 48 89 ef e8 b4 c5 f8 ff 90 0f 0b 48 c7 c6 d0 b0 f2 b2 48 89 ef e8 a2 c5 f8 ff 90 <0f> 0b 48 c7 c6 08 b1 f2 b2 48 89 ef e8 90 c5 f8 ff 90 0f 0b 66 66 RSP: 0018:ffffb6c043657c98 EFLAGS: 00000296 RAX: 000000000000004b RBX: ffff932bc1d1e401 RCX: ffff933abfb5c908 RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff933abfb5c900 RBP: ffffea6f04019080 R08: ffffffffb3338ce8 R09: 0000000000009ffb R10: 00000000000004dd R11: ffffffffb3308d00 R12: ffffea6f04019080 R13: ffffea6f04019080 R14: 0000000000000001 R15: ffffb6c043657da0 FS: 00007f6c60f6b740(0000) GS:ffff933abfb40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000559c3bc8b980 CR3: 0000000107f1c000 CR4: 00000000000006f0 Call Trace: split_huge_page_to_list+0x4d/0x1380 try_to_split_thp_page+0x3a/0xf0 soft_offline_page+0x1ea/0x8a0 soft_offline_page_store+0x52/0x90 kernfs_fop_write_iter+0x118/0x1b0 vfs_write+0x30b/0x430 ksys_write+0x5e/0xe0 do_syscall_64+0xb0/0x1b0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 RIP: 0033:0x7f6c60d14697 Code: 10 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 RSP: 002b:00007ffe9b72b8d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007f6c60d14697 RDX: 000000000000000c RSI: 0000559c3bc8b980 RDI: 0000000000000001 RBP: 0000559c3bc8b980 R08: 00007f6c60dd1460 R09: 000000007fffffff R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000000c R13: 00007f6c60e1a780 R14: 00007f6c60e16600 R15: 00007f6c60e15a00 The problem is that page->mapping is overloaded with slab->slab_list or slabs fields now, so slab pages could be taken as non-LRU movable pages if field slabs contains PAGE_MAPPING_MOVABLE or slab_list->prev is set to LIST_POISON2. These slab pages will be treated as thp later leading to crash in split_huge_page_to_list(). Link: https://lkml.kernel.org/r/20240126065837.2100184-1-linmiaohe@huawei.com Link: https://lkml.kernel.org/r/20240124084014.1772906-1-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Fixes: 130d4df57390 ("mm/sl[au]b: rearrange struct slab fields to allow larger rcu_head") Reviewed-by: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Naoya Horiguchi Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- mm/memory-failure.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 636280d04008d..9349948f1abfd 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1377,6 +1377,9 @@ void ClearPageHWPoisonTakenOff(struct page *page) */ static inline bool HWPoisonHandlable(struct page *page, unsigned long flags) { + if (PageSlab(page)) + return false; + /* Soft offline could migrate non-LRU movable pages */ if ((flags & MF_SOFT_OFFLINE) && __PageMovable(page)) return true; -- GitLab From 01c1484ac04790fe27a37f89dd3a350f99646815 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Tue, 24 Oct 2023 20:51:25 +0500 Subject: [PATCH 0805/1405] selftests: core: include linux/close_range.h for CLOSE_RANGE_* macros Correct header file is needed for getting CLOSE_RANGE_* macros. Previously it was tested with newer glibc which didn't show the need to include the header which was a mistake. Link: https://lkml.kernel.org/r/20231024155137.219700-1-usama.anjum@collabora.com Fixes: ec54424923cf ("selftests: core: remove duplicate defines") Reported-by: Aishwarya TCV Link: https://lore.kernel.org/all/7161219e-0223-d699-d6f3-81abd9abf13b@arm.com Signed-off-by: Muhammad Usama Anjum Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/core/close_range_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index 534576f06df1c..c59e4adb905df 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "../kselftest_harness.h" #include "../clone3/clone3_selftests.h" -- GitLab From e870920bbe68e52335a4c31a059e6af6a9a59dbb Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 22 Jan 2024 22:43:05 -0800 Subject: [PATCH 0806/1405] arch/arm/mm: fix major fault accounting when retrying under per-VMA lock The change [1] missed ARM architecture when fixing major fault accounting for page fault retry under per-VMA lock. The user-visible effects is that it restores correct major fault accounting that was broken after [2] was merged in 6.7 kernel. The more detailed description is in [3] and this patch simply adds the same fix to ARM architecture which I missed in [3]. Add missing code to fix ARM architecture fault accounting. [1] 46e714c729c8 ("arch/mm/fault: fix major fault accounting when retrying under per-VMA lock") [2] https://lore.kernel.org/all/20231006195318.4087158-6-willy@infradead.org/ [3] https://lore.kernel.org/all/20231226214610.109282-1-surenb@google.com/ Link: https://lkml.kernel.org/r/20240123064305.2829244-1-surenb@google.com Fixes: 12214eba1992 ("mm: handle read faults under the VMA lock") Reported-by: Russell King (Oracle) Signed-off-by: Suren Baghdasaryan Cc: Alexander Gordeev Cc: Andy Lutomirski Cc: Catalin Marinas Cc: Christophe Leroy Cc: Dave Hansen Cc: Gerald Schaefer Cc: Matthew Wilcox (Oracle) Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Peter Zijlstra Cc: Will Deacon Cc: Signed-off-by: Andrew Morton --- arch/arm/mm/fault.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index e96fb40b9cc32..07565b593ed68 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -298,6 +298,8 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) goto done; } count_vm_vma_lock_event(VMA_LOCK_RETRY); + if (fault & VM_FAULT_MAJOR) + flags |= FAULT_FLAG_TRIED; /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { -- GitLab From 4c2da3188b848d33c26d7f0f8b14f3150331c923 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Fri, 26 Jan 2024 12:25:48 +0900 Subject: [PATCH 0807/1405] mm/madvise: don't forget to leave lazy MMU mode in madvise_cold_or_pageout_pte_range() We need to leave lazy MMU mode before unlocking. Link: https://lkml.kernel.org/r/20240126032608.355899-1-senozhatsky@chromium.org Fixes: b2f557a21bc8 ("mm/madvise: add cond_resched() in madvise_cold_or_pageout_pte_range()") Signed-off-by: Sergey Senozhatsky Cc: Jiexun Wang Signed-off-by: Andrew Morton --- mm/madvise.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/madvise.c b/mm/madvise.c index 912155a94ed58..cfa5e72882611 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -429,6 +429,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, if (++batch_count == SWAP_CLUSTER_MAX) { batch_count = 0; if (need_resched()) { + arch_leave_lazy_mmu_mode(); pte_unmap_unlock(start_pte, ptl); cond_resched(); goto restart; -- GitLab From 2e601e1e8e4b330020a346c55ba111d49e0b188e Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 29 Jan 2024 20:34:38 -0500 Subject: [PATCH 0808/1405] mm: zswap: fix objcg use-after-free in entry destruction In the per-memcg LRU universe, LRU removal uses entry->objcg to determine which list count needs to be decreased. Drop the objcg reference after updating the LRU, to fix a possible use-after-free. Link: https://lkml.kernel.org/r/20240130013438.565167-1-hannes@cmpxchg.org Fixes: a65b0e7607cc ("zswap: make shrinking memcg-aware") Signed-off-by: Johannes Weiner Acked-by: Yosry Ahmed Reviewed-by: Nhat Pham Reviewed-by: Chengming Zhou Signed-off-by: Andrew Morton --- mm/zswap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/zswap.c b/mm/zswap.c index ca25b676048ea..0a94b197ed32e 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -536,10 +536,6 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry) */ static void zswap_free_entry(struct zswap_entry *entry) { - if (entry->objcg) { - obj_cgroup_uncharge_zswap(entry->objcg, entry->length); - obj_cgroup_put(entry->objcg); - } if (!entry->length) atomic_dec(&zswap_same_filled_pages); else { @@ -548,6 +544,10 @@ static void zswap_free_entry(struct zswap_entry *entry) atomic_dec(&entry->pool->nr_stored); zswap_pool_put(entry->pool); } + if (entry->objcg) { + obj_cgroup_uncharge_zswap(entry->objcg, entry->length); + obj_cgroup_put(entry->objcg); + } zswap_entry_cache_free(entry); atomic_dec(&zswap_stored_pages); zswap_update_total_size(); -- GitLab From f2076032096775d1bb1af16b6eddbc6534575328 Mon Sep 17 00:00:00 2001 From: John Moon Date: Wed, 31 Jan 2024 03:43:18 +0000 Subject: [PATCH 0809/1405] mailmap: switch email address for John Moon Add current email address as QUIC email is no longer active. Link: https://lkml.kernel.org/r/20240131034311.46706-1-john@jmoon.dev Signed-off-by: John Moon Acked-by: Trilok Soni Cc: Elliot Berman Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 04998f7bda818..8ae00bd3708a7 100644 --- a/.mailmap +++ b/.mailmap @@ -289,6 +289,7 @@ Johan Hovold John Crispin John Fastabend John Keeping +John Moon John Paul Adrian Glaubitz John Stultz -- GitLab From 79d72c68c58784a3e1cd2378669d51bfd0cb7498 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Tue, 30 Jan 2024 22:04:18 +0100 Subject: [PATCH 0810/1405] fs,hugetlb: fix NULL pointer dereference in hugetlbs_fill_super When configuring a hugetlb filesystem via the fsconfig() syscall, there is a possible NULL dereference in hugetlbfs_fill_super() caused by assigning NULL to ctx->hstate in hugetlbfs_parse_param() when the requested pagesize is non valid. E.g: Taking the following steps: fd = fsopen("hugetlbfs", FSOPEN_CLOEXEC); fsconfig(fd, FSCONFIG_SET_STRING, "pagesize", "1024", 0); fsconfig(fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); Given that the requested "pagesize" is invalid, ctxt->hstate will be replaced with NULL, losing its previous value, and we will print an error: ... ... case Opt_pagesize: ps = memparse(param->string, &rest); ctx->hstate = h; if (!ctx->hstate) { pr_err("Unsupported page size %lu MB\n", ps / SZ_1M); return -EINVAL; } return 0; ... ... This is a problem because later on, we will dereference ctxt->hstate in hugetlbfs_fill_super() ... ... sb->s_blocksize = huge_page_size(ctx->hstate); ... ... Causing below Oops. Fix this by replacing cxt->hstate value only when then pagesize is known to be valid. kernel: hugetlbfs: Unsupported page size 0 MB kernel: BUG: kernel NULL pointer dereference, address: 0000000000000028 kernel: #PF: supervisor read access in kernel mode kernel: #PF: error_code(0x0000) - not-present page kernel: PGD 800000010f66c067 P4D 800000010f66c067 PUD 1b22f8067 PMD 0 kernel: Oops: 0000 [#1] PREEMPT SMP PTI kernel: CPU: 4 PID: 5659 Comm: syscall Tainted: G E 6.8.0-rc2-default+ #22 5a47c3fef76212addcc6eb71344aabc35190ae8f kernel: Hardware name: Intel Corp. GROVEPORT/GROVEPORT, BIOS GVPRCRB1.86B.0016.D04.1705030402 05/03/2017 kernel: RIP: 0010:hugetlbfs_fill_super+0xb4/0x1a0 kernel: Code: 48 8b 3b e8 3e c6 ed ff 48 85 c0 48 89 45 20 0f 84 d6 00 00 00 48 b8 ff ff ff ff ff ff ff 7f 4c 89 e7 49 89 44 24 20 48 8b 03 <8b> 48 28 b8 00 10 00 00 48 d3 e0 49 89 44 24 18 48 8b 03 8b 40 28 kernel: RSP: 0018:ffffbe9960fcbd48 EFLAGS: 00010246 kernel: RAX: 0000000000000000 RBX: ffff9af5272ae780 RCX: 0000000000372004 kernel: RDX: ffffffffffffffff RSI: ffffffffffffffff RDI: ffff9af555e9b000 kernel: RBP: ffff9af52ee66b00 R08: 0000000000000040 R09: 0000000000370004 kernel: R10: ffffbe9960fcbd48 R11: 0000000000000040 R12: ffff9af555e9b000 kernel: R13: ffffffffa66b86c0 R14: ffff9af507d2f400 R15: ffff9af507d2f400 kernel: FS: 00007ffbc0ba4740(0000) GS:ffff9b0bd7000000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 0000000000000028 CR3: 00000001b1ee0000 CR4: 00000000001506f0 kernel: Call Trace: kernel: kernel: ? __die_body+0x1a/0x60 kernel: ? page_fault_oops+0x16f/0x4a0 kernel: ? search_bpf_extables+0x65/0x70 kernel: ? fixup_exception+0x22/0x310 kernel: ? exc_page_fault+0x69/0x150 kernel: ? asm_exc_page_fault+0x22/0x30 kernel: ? __pfx_hugetlbfs_fill_super+0x10/0x10 kernel: ? hugetlbfs_fill_super+0xb4/0x1a0 kernel: ? hugetlbfs_fill_super+0x28/0x1a0 kernel: ? __pfx_hugetlbfs_fill_super+0x10/0x10 kernel: vfs_get_super+0x40/0xa0 kernel: ? __pfx_bpf_lsm_capable+0x10/0x10 kernel: vfs_get_tree+0x25/0xd0 kernel: vfs_cmd_create+0x64/0xe0 kernel: __x64_sys_fsconfig+0x395/0x410 kernel: do_syscall_64+0x80/0x160 kernel: ? syscall_exit_to_user_mode+0x82/0x240 kernel: ? do_syscall_64+0x8d/0x160 kernel: ? syscall_exit_to_user_mode+0x82/0x240 kernel: ? do_syscall_64+0x8d/0x160 kernel: ? exc_page_fault+0x69/0x150 kernel: entry_SYSCALL_64_after_hwframe+0x6e/0x76 kernel: RIP: 0033:0x7ffbc0cb87c9 kernel: Code: 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 97 96 0d 00 f7 d8 64 89 01 48 kernel: RSP: 002b:00007ffc29d2f388 EFLAGS: 00000206 ORIG_RAX: 00000000000001af kernel: RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007ffbc0cb87c9 kernel: RDX: 0000000000000000 RSI: 0000000000000006 RDI: 0000000000000003 kernel: RBP: 00007ffc29d2f3b0 R08: 0000000000000000 R09: 0000000000000000 kernel: R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000 kernel: R13: 00007ffc29d2f4c0 R14: 0000000000000000 R15: 0000000000000000 kernel: kernel: Modules linked in: rpcsec_gss_krb5(E) auth_rpcgss(E) nfsv4(E) dns_resolver(E) nfs(E) lockd(E) grace(E) sunrpc(E) netfs(E) af_packet(E) bridge(E) stp(E) llc(E) iscsi_ibft(E) iscsi_boot_sysfs(E) intel_rapl_msr(E) intel_rapl_common(E) iTCO_wdt(E) intel_pmc_bxt(E) sb_edac(E) iTCO_vendor_support(E) x86_pkg_temp_thermal(E) intel_powerclamp(E) coretemp(E) kvm_intel(E) rfkill(E) ipmi_ssif(E) kvm(E) acpi_ipmi(E) irqbypass(E) pcspkr(E) igb(E) ipmi_si(E) mei_me(E) i2c_i801(E) joydev(E) intel_pch_thermal(E) i2c_smbus(E) dca(E) lpc_ich(E) mei(E) ipmi_devintf(E) ipmi_msghandler(E) acpi_pad(E) tiny_power_button(E) button(E) fuse(E) efi_pstore(E) configfs(E) ip_tables(E) x_tables(E) ext4(E) mbcache(E) jbd2(E) hid_generic(E) usbhid(E) sd_mod(E) t10_pi(E) crct10dif_pclmul(E) crc32_pclmul(E) crc32c_intel(E) polyval_clmulni(E) ahci(E) xhci_pci(E) polyval_generic(E) gf128mul(E) ghash_clmulni_intel(E) sha512_ssse3(E) sha256_ssse3(E) xhci_pci_renesas(E) libahci(E) ehci_pci(E) sha1_ssse3(E) xhci_hcd(E) ehci_hcd(E) libata(E) kernel: mgag200(E) i2c_algo_bit(E) usbcore(E) wmi(E) sg(E) dm_multipath(E) dm_mod(E) scsi_dh_rdac(E) scsi_dh_emc(E) scsi_dh_alua(E) scsi_mod(E) scsi_common(E) aesni_intel(E) crypto_simd(E) cryptd(E) kernel: Unloaded tainted modules: acpi_cpufreq(E):1 fjes(E):1 kernel: CR2: 0000000000000028 kernel: ---[ end trace 0000000000000000 ]--- kernel: RIP: 0010:hugetlbfs_fill_super+0xb4/0x1a0 kernel: Code: 48 8b 3b e8 3e c6 ed ff 48 85 c0 48 89 45 20 0f 84 d6 00 00 00 48 b8 ff ff ff ff ff ff ff 7f 4c 89 e7 49 89 44 24 20 48 8b 03 <8b> 48 28 b8 00 10 00 00 48 d3 e0 49 89 44 24 18 48 8b 03 8b 40 28 kernel: RSP: 0018:ffffbe9960fcbd48 EFLAGS: 00010246 kernel: RAX: 0000000000000000 RBX: ffff9af5272ae780 RCX: 0000000000372004 kernel: RDX: ffffffffffffffff RSI: ffffffffffffffff RDI: ffff9af555e9b000 kernel: RBP: ffff9af52ee66b00 R08: 0000000000000040 R09: 0000000000370004 kernel: R10: ffffbe9960fcbd48 R11: 0000000000000040 R12: ffff9af555e9b000 kernel: R13: ffffffffa66b86c0 R14: ffff9af507d2f400 R15: ffff9af507d2f400 kernel: FS: 00007ffbc0ba4740(0000) GS:ffff9b0bd7000000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 0000000000000028 CR3: 00000001b1ee0000 CR4: 00000000001506f0 Link: https://lkml.kernel.org/r/20240130210418.3771-1-osalvador@suse.de Fixes: 32021982a324 ("hugetlbfs: Convert to fs_context") Signed-off-by: Michal Hocko Signed-off-by: Oscar Salvador Acked-by: Muchun Song Cc: Signed-off-by: Andrew Morton --- fs/hugetlbfs/inode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index ee13c2ca8ad29..d746866ae3b6b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1365,6 +1365,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par { struct hugetlbfs_fs_context *ctx = fc->fs_private; struct fs_parse_result result; + struct hstate *h; char *rest; unsigned long ps; int opt; @@ -1409,11 +1410,12 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par case Opt_pagesize: ps = memparse(param->string, &rest); - ctx->hstate = size_to_hstate(ps); - if (!ctx->hstate) { + h = size_to_hstate(ps); + if (!h) { pr_err("Unsupported page size %lu MB\n", ps / SZ_1M); return -EINVAL; } + ctx->hstate = h; return 0; case Opt_min_size: -- GitLab From 27d3969b47cc38810b3fd65d72231940e8671e6c Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Sun, 28 Jan 2024 13:28:49 +0000 Subject: [PATCH 0811/1405] mm/zswap: don't return LRU_SKIP if we have dropped lru lock LRU_SKIP can only be returned if we don't ever dropped lru lock, or we need to return LRU_RETRY to restart from the head of lru list. Otherwise, the iteration might continue from a cursor position that was freed while the locks were dropped. Actually we may need to introduce another LRU_STOP to really terminate the ongoing shrinking scan process, when we encounter a warm page already in the swap cache. The current list_lru implementation doesn't have this function to early break from __list_lru_walk_one. Link: https://lkml.kernel.org/r/20240126-zswap-writeback-race-v2-1-b10479847099@bytedance.com Fixes: b5ba474f3f51 ("zswap: shrink zswap pool based on memory pressure") Signed-off-by: Chengming Zhou Acked-by: Johannes Weiner Reviewed-by: Nhat Pham Cc: Chris Li Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- mm/zswap.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/zswap.c b/mm/zswap.c index 0a94b197ed32e..350dd2fc81599 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -895,10 +895,8 @@ static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_o * into the warmer region. We should terminate shrinking (if we're in the dynamic * shrinker context). */ - if (writeback_result == -EEXIST && encountered_page_in_swapcache) { - ret = LRU_SKIP; + if (writeback_result == -EEXIST && encountered_page_in_swapcache) *encountered_page_in_swapcache = true; - } goto put_unlock; } -- GitLab From 6f1f15a5e492082c8082bea56d87852b65588b5c Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Thu, 1 Feb 2024 10:10:08 +0800 Subject: [PATCH 0812/1405] MAINTAINERS: Leo Yan has moved I will lose access to my @linaro.org email address next week, update the MAINTAINERS file and map it in .mailmap with the new email address. Link: https://lkml.kernel.org/r/20240201021022.886-1-leo.yan@linux.dev Signed-off-by: Leo Yan Cc: Arnaldo Carvalho de Melo Cc: Ian Rogers Cc: James Clark Cc: Mike Leach Cc: Namhyung Kim Signed-off-by: Andrew Morton --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 8ae00bd3708a7..ee8f03cc7f726 100644 --- a/.mailmap +++ b/.mailmap @@ -345,6 +345,7 @@ Leonid I Ananiev Leon Romanovsky Leon Romanovsky Leon Romanovsky +Leo Yan Liam Mark Linas Vepstas Linus Lüssing diff --git a/MAINTAINERS b/MAINTAINERS index 960512bec4288..95b47e3cee0aa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17183,7 +17183,7 @@ R: John Garry R: Will Deacon R: James Clark R: Mike Leach -R: Leo Yan +R: Leo Yan L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported F: tools/build/feature/test-libopencsd.c -- GitLab From 38296afe3c6ee07319e01bb249aa4bb47c07b534 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 31 Jan 2024 23:56:57 +0900 Subject: [PATCH 0813/1405] nilfs2: fix hang in nilfs_lookup_dirty_data_buffers() Syzbot reported a hang issue in migrate_pages_batch() called by mbind() and nilfs_lookup_dirty_data_buffers() called in the log writer of nilfs2. While migrate_pages_batch() locks a folio and waits for the writeback to complete, the log writer thread that should bring the writeback to completion picks up the folio being written back in nilfs_lookup_dirty_data_buffers() that it calls for subsequent log creation and was trying to lock the folio. Thus causing a deadlock. In the first place, it is unexpected that folios/pages in the middle of writeback will be updated and become dirty. Nilfs2 adds a checksum to verify the validity of the log being written and uses it for recovery at mount, so data changes during writeback are suppressed. Since this is broken, an unclean shutdown could potentially cause recovery to fail. Investigation revealed that the root cause is that the wait for writeback completion in nilfs_page_mkwrite() is conditional, and if the backing device does not require stable writes, data may be modified without waiting. Fix these issues by making nilfs_page_mkwrite() wait for writeback to finish regardless of the stable write requirement of the backing device. Link: https://lkml.kernel.org/r/20240131145657.4209-1-konishi.ryusuke@gmail.com Fixes: 1d1d1a767206 ("mm: only enforce stable page writes if the backing device requires it") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+ee2ae68da3b22d04cd8d@syzkaller.appspotmail.com Closes: https://lkml.kernel.org/r/00000000000047d819061004ad6c@google.com Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton --- fs/nilfs2/file.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index bec33b89a0758..0e3fc5ba33c73 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -107,7 +107,13 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf) nilfs_transaction_commit(inode->i_sb); mapped: - folio_wait_stable(folio); + /* + * Since checksumming including data blocks is performed to determine + * the validity of the log to be written and used for recovery, it is + * necessary to wait for writeback to finish here, regardless of the + * stable write requirement of the backing device. + */ + folio_wait_writeback(folio); out: sb_end_pagefault(inode->i_sb); return vmf_fs_error(ret); -- GitLab From b9e4bc1046d20e0623a80660ef8627448056f817 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 2 Feb 2024 11:19:56 -0800 Subject: [PATCH 0814/1405] mm/damon/sysfs-schemes: fix wrong DAMOS tried regions update timeout setup DAMON sysfs interface's update_schemes_tried_regions command has a timeout of two apply intervals of the DAMOS scheme. Having zero value DAMOS scheme apply interval means it will use the aggregation interval as the value. However, the timeout setup logic is mistakenly using the sampling interval insted of the aggregartion interval for the case. This could cause earlier-than-expected timeout of the command. Fix it. Link: https://lkml.kernel.org/r/20240202191956.88791-1-sj@kernel.org Fixes: 7d6fa31a2fd7 ("mm/damon/sysfs-schemes: add timeout for update_schemes_tried_regions") Signed-off-by: SeongJae Park Cc: # 6.7.x Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index 8dbaac6e5c2d0..dd2fb51270092 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -2194,7 +2194,7 @@ static void damos_tried_regions_init_upd_status( sysfs_regions->upd_timeout_jiffies = jiffies + 2 * usecs_to_jiffies(scheme->apply_interval_us ? scheme->apply_interval_us : - ctx->attrs.sample_interval); + ctx->attrs.aggr_interval); } } -- GitLab From 5bc09b397cbf1221f8a8aacb1152650c9195b02b Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 4 Feb 2024 01:16:45 +0900 Subject: [PATCH 0815/1405] nilfs2: fix potential bug in end_buffer_async_write According to a syzbot report, end_buffer_async_write(), which handles the completion of block device writes, may detect abnormal condition of the buffer async_write flag and cause a BUG_ON failure when using nilfs2. Nilfs2 itself does not use end_buffer_async_write(). But, the async_write flag is now used as a marker by commit 7f42ec394156 ("nilfs2: fix issue with race condition of competition between segments for dirty blocks") as a means of resolving double list insertion of dirty blocks in nilfs_lookup_dirty_data_buffers() and nilfs_lookup_node_buffers() and the resulting crash. This modification is safe as long as it is used for file data and b-tree node blocks where the page caches are independent. However, it was irrelevant and redundant to also introduce async_write for segment summary and super root blocks that share buffers with the backing device. This led to the possibility that the BUG_ON check in end_buffer_async_write would fail as described above, if independent writebacks of the backing device occurred in parallel. The use of async_write for segment summary buffers has already been removed in a previous change. Fix this issue by removing the manipulation of the async_write flag for the remaining super root block buffer. Link: https://lkml.kernel.org/r/20240203161645.4992-1-konishi.ryusuke@gmail.com Fixes: 7f42ec394156 ("nilfs2: fix issue with race condition of competition between segments for dirty blocks") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+5c04210f7c7f897c1e7f@syzkaller.appspotmail.com Closes: https://lkml.kernel.org/r/00000000000019a97c05fd42f8c8@google.com Cc: Signed-off-by: Andrew Morton --- fs/nilfs2/segment.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 2590a0860eab0..2bfb08052d399 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1703,7 +1703,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - set_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { if (bh->b_folio != bd_folio) { folio_lock(bd_folio); @@ -1714,6 +1713,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) } break; } + set_buffer_async_write(bh); if (bh->b_folio != fs_folio) { nilfs_begin_folio_io(fs_folio); fs_folio = bh->b_folio; @@ -1800,7 +1800,6 @@ static void nilfs_abort_logs(struct list_head *logs, int err) list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - clear_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { clear_buffer_uptodate(bh); if (bh->b_folio != bd_folio) { @@ -1809,6 +1808,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err) } break; } + clear_buffer_async_write(bh); if (bh->b_folio != fs_folio) { nilfs_end_folio_io(fs_folio, err); fs_folio = bh->b_folio; @@ -1896,8 +1896,9 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) BIT(BH_Delay) | BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Redirected)); - set_mask_bits(&bh->b_state, clear_bits, set_bits); if (bh == segbuf->sb_super_root) { + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); if (bh->b_folio != bd_folio) { folio_end_writeback(bd_folio); bd_folio = bh->b_folio; @@ -1905,6 +1906,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) update_sr = true; break; } + set_mask_bits(&bh->b_state, clear_bits, set_bits); if (bh->b_folio != fs_folio) { nilfs_end_folio_io(fs_folio, 0); fs_folio = bh->b_folio; -- GitLab From 855678ed8534518e2b428bcbcec695de9ba248e8 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 1 Feb 2024 17:25:51 +0800 Subject: [PATCH 0816/1405] md: Fix missing release of 'active_io' for flush submit_flushes atomic_set(&mddev->flush_pending, 1); rdev_for_each_rcu(rdev, mddev) atomic_inc(&mddev->flush_pending); bi->bi_end_io = md_end_flush submit_bio(bi); /* flush io is done first */ md_end_flush if (atomic_dec_and_test(&mddev->flush_pending)) percpu_ref_put(&mddev->active_io) -> active_io is not released if (atomic_dec_and_test(&mddev->flush_pending)) -> missing release of active_io For consequence, mddev_suspend() will wait for 'active_io' to be zero forever. Fix this problem by releasing 'active_io' in submit_flushes() if 'flush_pending' is decreased to zero. Fixes: fa2bbff7b0b4 ("md: synchronize flush io with array reconfiguration") Cc: stable@vger.kernel.org # v6.1+ Reported-by: Blazej Kucman Closes: https://lore.kernel.org/lkml/20240130172524.0000417b@linux.intel.com/ Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240201092559.910982-7-yukuai1@huaweicloud.com --- drivers/md/md.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 2266358d80746..e52896c140bdc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -579,8 +579,12 @@ static void submit_flushes(struct work_struct *ws) rcu_read_lock(); } rcu_read_unlock(); - if (atomic_dec_and_test(&mddev->flush_pending)) + if (atomic_dec_and_test(&mddev->flush_pending)) { + /* The pair is percpu_ref_get() from md_flush_request() */ + percpu_ref_put(&mddev->active_io); + queue_work(md_wq, &mddev->flush_work); + } } static void md_submit_flush_data(struct work_struct *ws) -- GitLab From ddc7d4c584704666fe7088bbd9ec2d72d0f63e65 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 1 Feb 2024 09:55:32 -0800 Subject: [PATCH 0817/1405] drm/xe: Fix loop in vm_bind_ioctl_ops_unwind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The logic for the unwind loop is incorrect resulting in an infinite loop. Fix to unwind to go from the last operations list to he first. Fixes: 617eebb9c480 ("drm/xe: Fix array of binds") Signed-off-by: Matthew Brost Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20240201175532.2303168-1-matthew.brost@intel.com (cherry picked from commit 3acc1ff1a72fce00cdbd3ef1c27108a967fd5616) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 30db264d34a32..d45cbf75d2037 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2669,7 +2669,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, { int i; - for (i = num_ops_list - 1; i; ++i) { + for (i = num_ops_list - 1; i >= 0; --i) { struct drm_gpuva_ops *__ops = ops[i]; struct drm_gpuva_op *__op; -- GitLab From fc29b6d5ab5395dcb9f35de71e0347f3a6bca542 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 31 Jan 2024 16:48:48 -0800 Subject: [PATCH 0818/1405] drm/xe: Take a reference in xe_exec_queue_last_fence_get() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take a reference in xe_exec_queue_last_fence_get(). Also fix a reference counting underflow bug VM bind and unbind. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20240201004849.2219558-2-matthew.brost@intel.com (cherry picked from commit a856b67a84169e065ebbeee50258936b1eacc9eb) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_exec_queue.c | 8 ++++++-- drivers/gpu/drm/xe/xe_migrate.c | 5 ++++- drivers/gpu/drm/xe/xe_sched_job.c | 1 - drivers/gpu/drm/xe/xe_sync.c | 2 -- drivers/gpu/drm/xe/xe_vm.c | 1 + 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index bcfc4127c7c59..254b1d3af4cb5 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -926,20 +926,24 @@ void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q) * @q: The exec queue * @vm: The VM the engine does a bind or exec for * - * Get last fence, does not take a ref + * Get last fence, takes a ref * * Returns: last fence if not signaled, dma fence stub if signaled */ struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, struct xe_vm *vm) { + struct dma_fence *fence; + xe_exec_queue_last_fence_lockdep_assert(q, vm); if (q->last_fence && test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) xe_exec_queue_last_fence_put(q, vm); - return q->last_fence ? q->last_fence : dma_fence_get_stub(); + fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); + dma_fence_get(fence); + return fence; } /** diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 5c6c546242521..0cc31837ef26b 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1204,8 +1204,11 @@ static bool no_in_syncs(struct xe_vm *vm, struct xe_exec_queue *q, } if (q) { fence = xe_exec_queue_last_fence_get(q, vm); - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { + dma_fence_put(fence); return false; + } + dma_fence_put(fence); } return true; diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 01106a1156ad8..4e2ccad0e52fa 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -274,7 +274,6 @@ int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm) struct dma_fence *fence; fence = xe_exec_queue_last_fence_get(job->q, vm); - dma_fence_get(fence); return drm_sched_job_add_dependency(&job->drm, fence); } diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index e4c220cf9115e..aab92bee1d7cf 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -307,7 +307,6 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, /* Easy case... */ if (!num_in_fence) { fence = xe_exec_queue_last_fence_get(q, vm); - dma_fence_get(fence); return fence; } @@ -322,7 +321,6 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, } } fences[current_fence++] = xe_exec_queue_last_fence_get(q, vm); - dma_fence_get(fences[current_fence - 1]); cf = dma_fence_array_create(num_in_fence, fences, vm->composite_fence_ctx, vm->composite_fence_seqno++, diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index d45cbf75d2037..96b1833c0e8c6 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1984,6 +1984,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, xe_exec_queue_last_fence_get(wait_exec_queue, vm); xe_sync_entry_signal(&syncs[i], NULL, fence); + dma_fence_put(fence); } } -- GitLab From 21abf108a062fa0323077b5ba3d26e2c0bba9232 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 31 Jan 2024 16:48:49 -0800 Subject: [PATCH 0819/1405] drm/xe: Pick correct userptr VMA to repin on REMAP op failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A REMAP op is composed of 3 VMA's - unmap, prev map, and next map. When op_execute fails with -EAGAIN we need to update the local VMA pointer to the current op state and then repin the VMA if it is a userptr. Fixes a failure seen in xe_vm.munmap-style-unbind-userptr-one-partial. Fixes: b06d47be7c83 ("drm/xe: Port Xe to GPUVA") Signed-off-by: Matthew Brost Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20240201004849.2219558-3-matthew.brost@intel.com (cherry picked from commit 447f74d223b4f6cbab74963bf1099050c15374ce) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 96b1833c0e8c6..ca11f83811155 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2531,13 +2531,25 @@ static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma, } drm_exec_fini(&exec); - if (err == -EAGAIN && xe_vma_is_userptr(vma)) { + if (err == -EAGAIN) { lockdep_assert_held_write(&vm->lock); - err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); - if (!err) - goto retry_userptr; - trace_xe_vma_fail(vma); + if (op->base.op == DRM_GPUVA_OP_REMAP) { + if (!op->remap.unmap_done) + vma = gpuva_to_vma(op->base.remap.unmap->va); + else if (op->remap.prev) + vma = op->remap.prev; + else + vma = op->remap.next; + } + + if (xe_vma_is_userptr(vma)) { + err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); + if (!err) + goto retry_userptr; + + trace_xe_vma_fail(vma); + } } return err; -- GitLab From 90773aaf9129ea6f47915bd3c47da261abe6a447 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 3 Jan 2024 12:48:02 +0100 Subject: [PATCH 0820/1405] drm/xe: circumvent bogus stringop-overflow warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc-13 warns about an array overflow that it sees but that is prevented by the "asid % NUM_PF_QUEUE" calculation: drivers/gpu/drm/xe/xe_gt_pagefault.c: In function 'xe_guc_pagefault_handler': include/linux/fortify-string.h:57:33: error: writing 16 bytes into a region of size 0 [-Werror=stringop-overflow=] include/linux/fortify-string.h:689:26: note: in expansion of macro '__fortify_memcpy_chk' 689 | #define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ | ^~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/xe/xe_gt_pagefault.c:341:17: note: in expansion of macro 'memcpy' 341 | memcpy(pf_queue->data + pf_queue->tail, msg, len * sizeof(u32)); | ^~~~~~ drivers/gpu/drm/xe/xe_gt_types.h:102:25: note: at offset [1144, 265324] into destination object 'tile' of size 8 I found that rewriting the assignment using pointer addition rather than the equivalent array index calculation prevents the warning, so use that instead. I sent a bug report against gcc for the false positive warning. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113214 Signed-off-by: Arnd Bergmann Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240103114819.2913937-1-arnd@kernel.org (cherry picked from commit 774ef5dfc95578a9079426d5106076dcd59c4dfa) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 9c2fe1697d6ee..73f08f1924df2 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -335,7 +335,7 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) return -EPROTO; asid = FIELD_GET(PFD_ASID, msg[1]); - pf_queue = >->usm.pf_queue[asid % NUM_PF_QUEUE]; + pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE); spin_lock_irqsave(&pf_queue->lock, flags); full = pf_queue_full(pf_queue); -- GitLab From 3aa3c5c249086ffc920e8f6d6a15bdd441153d45 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 1 Feb 2024 19:34:40 -0800 Subject: [PATCH 0821/1405] drm/xe: Map both mem.kernel_bb_pool and usm.bb_pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For integrated devices we need to map both mem.kernel_bb_pool and usm.bb_pool to be able to run batches from both pools. Fixes: a682b6a42d4d ("drm/xe: Support device page faults on integrated platforms") Tested-by: Brian Welty Signed-off-by: Matthew Brost Reviewed-by: Brian Welty Link: https://patchwork.freedesktop.org/patch/msgid/20240202033440.2351862-1-matthew.brost@intel.com (cherry picked from commit 72f86ed3c88933d6fa09b036de93621ea71097a7) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_gt.c | 5 ++++- drivers/gpu/drm/xe/xe_migrate.c | 23 ++++++++++++++++++----- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 3af2adec12956..35474ddbaf97e 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -437,7 +437,10 @@ static int all_fw_domain_init(struct xe_gt *gt) * USM has its only SA pool to non-block behind user operations */ if (gt_to_xe(gt)->info.has_usm) { - gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt), SZ_1M, 16); + struct xe_device *xe = gt_to_xe(gt); + + gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt), + IS_DGFX(xe) ? SZ_1M : SZ_512K, 16); if (IS_ERR(gt->usm.bb_pool)) { err = PTR_ERR(gt->usm.bb_pool); goto err_force_wake; diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 0cc31837ef26b..70480c3056021 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -170,11 +170,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (!IS_DGFX(xe)) { /* Write out batch too */ m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE; - if (xe->info.has_usm) { - batch = tile->primary_gt->usm.bb_pool->bo; - m->usm_batch_base_ofs = m->batch_base_ofs; - } - for (i = 0; i < batch->size; i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { @@ -185,6 +180,24 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, entry); level++; } + if (xe->info.has_usm) { + xe_tile_assert(tile, batch->size == SZ_1M); + + batch = tile->primary_gt->usm.bb_pool->bo; + m->usm_batch_base_ofs = m->batch_base_ofs + SZ_1M; + xe_tile_assert(tile, batch->size == SZ_512K); + + for (i = 0; i < batch->size; + i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : + XE_PAGE_SIZE) { + entry = vm->pt_ops->pte_encode_bo(batch, i, + pat_index, 0); + + xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, + entry); + level++; + } + } } else { u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE); -- GitLab From 11572b3f68d9933fef5c1afef4c20041701d8025 Mon Sep 17 00:00:00 2001 From: Xiaoming Wang Date: Fri, 2 Feb 2024 13:56:58 -0800 Subject: [PATCH 0822/1405] drm/xe/display: Fix memleak in display initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_power_domains_init is called twice in xe_device_probe: 1) intel_power_domains_init() xe_display_init_nommio() xe_device_probe() 2) intel_power_domains_init() intel_display_driver_probe_noirq() xe_display_init_noirq() xe_device_probe() It needs remove one to avoid power_domains->power_wells double malloc. unreferenced object 0xffff88811150ee00 (size 512): comm "systemd-udevd", pid 506, jiffies 4294674198 (age 3605.560s) hex dump (first 32 bytes): 10 b4 9d a0 ff ff ff ff ff ff ff ff ff ff ff ff ................ ff ff ff ff ff ff ff ff 00 00 00 00 00 00 00 00 ................ backtrace: [] __kmem_cache_alloc_node+0x1c1/0x2b0 [] __kmalloc+0x52/0x150 [] __set_power_wells+0xc3/0x360 [xe] [] xe_display_init_nommio+0x4c/0x70 [xe] [] xe_device_probe+0x3c/0x5a0 [xe] [] xe_pci_probe+0x33f/0x5a0 [xe] [] local_pci_probe+0x47/0xa0 [] pci_device_probe+0xc3/0x1f0 [] really_probe+0x1a2/0x410 [] __driver_probe_device+0x78/0x160 [] driver_probe_device+0x1e/0x90 [] __driver_attach+0xda/0x1d0 [] bus_for_each_dev+0x7c/0xd0 [] bus_add_driver+0x119/0x220 [] driver_register+0x60/0x120 [] 0xffffffffa05e50a0 The call to intel_power_domains_cleanup() needs to stay where it is for now. The main issue is that while the init is called by the display side, shared by i915 and xe, the cleanup is called by a non-shared code path. Fixing that will be done as a separate commit. Fixes: 44e694958b95 ("drm/xe/display: Implement display support") Signed-off-by: Xiaoming Wang [ reword commit message and explain why the fini needs to stay where it is ] Reviewed-by: Lucas De Marchi Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240202215658.561298-1-lucas.demarchi@intel.com (cherry picked from commit 86c99abb5f1b6fcd69fb268eeb2e34cb7c4f355c) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_display.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_display.c b/drivers/gpu/drm/xe/xe_display.c index 74391d9b11ae0..e4db069f0db3f 100644 --- a/drivers/gpu/drm/xe/xe_display.c +++ b/drivers/gpu/drm/xe/xe_display.c @@ -134,8 +134,6 @@ static void xe_display_fini_nommio(struct drm_device *dev, void *dummy) int xe_display_init_nommio(struct xe_device *xe) { - int err; - if (!xe->info.enable_display) return 0; @@ -145,10 +143,6 @@ int xe_display_init_nommio(struct xe_device *xe) /* This must be called before any calls to HAS_PCH_* */ intel_detect_pch(xe); - err = intel_power_domains_init(xe); - if (err) - return err; - return drmm_add_action_or_reset(&xe->drm, xe_display_fini_nommio, xe); } -- GitLab From 95c058c8ef1d5d9e39ab2039a5eea4d5b93f4117 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 5 Feb 2024 15:17:14 -0800 Subject: [PATCH 0823/1405] drm/xe: Assume large page size if VMA not yet bound MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The calculation to determine max page size of a VMA during a REMAP operations assumes the VMA has been bound. This assumption is not true if the VMA is from an eariler operation in an array of binds. If a VMA has not been bound use the maximum page size which will ensure the previous / next REMAP operations are not incorrectly skipped. Fixes: 8f33b4f054fc ("drm/xe: Avoid doing rebinds") Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240205231714.2956225-1-matthew.brost@intel.com (cherry picked from commit 5ad6af5c91e9b942c44b657122270d935db3a813) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index ca11f83811155..99f151d73154a 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2200,8 +2200,10 @@ static u64 xe_vma_max_pte_size(struct xe_vma *vma) return SZ_1G; else if (vma->gpuva.flags & XE_VMA_PTE_2M) return SZ_2M; + else if (vma->gpuva.flags & XE_VMA_PTE_4K) + return SZ_4K; - return SZ_4K; + return SZ_1G; /* Uninitialized, used max size */ } static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size) -- GitLab From 9e3fc1d65d4e8cf302e289847ab165ad9358fdb2 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 2 Feb 2024 17:14:36 +0000 Subject: [PATCH 0824/1405] drm/xe/vm: don't ignore error when in_kthread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If GUP fails and we are in_kthread, we can have pinned = 0 and ret = 0. If that happens we call sg_alloc_append_table_from_pages() with n_pages = 0, which is not well behaved and can trigger: kernel BUG at include/linux/scatterlist.h:115! depending on if the pages array happens to be zeroed or not. Even if we don't hit that it crashes later when trying to dma_map the returned table. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240202171435.427630-2-matthew.auld@intel.com (cherry picked from commit 8087199cd5951c1eba26003b3e4296dbb2110adf) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 99f151d73154a..a9df894131559 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -114,11 +114,8 @@ int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) num_pages - pinned, read_only ? 0 : FOLL_WRITE, &pages[pinned]); - if (ret < 0) { - if (in_kthread) - ret = 0; + if (ret < 0) break; - } pinned += ret; ret = 0; -- GitLab From bf4c27b8267d7848bb81fd41e6aa07aa662f07fb Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 5 Feb 2024 20:50:10 -0800 Subject: [PATCH 0825/1405] drm/xe: Remove TEST_VM_ASYNC_OPS_ERROR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TEST_VM_ASYNC_OPS_ERROR is broken and unused. Remove for now and will pull back in a later time when it is used, fixed, and properly hidden behind a Kconfig option. Also fixup the supported flags value. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20240206045010.2981051-1-matthew.brost@intel.com (cherry picked from commit d9890c028d66a9e1ee3cccaa081ab5aedcbfe431) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 28 +--------------------------- drivers/gpu/drm/xe/xe_vm_types.h | 8 -------- 2 files changed, 1 insertion(+), 35 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index a9df894131559..865e10d0a06aa 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -37,8 +37,6 @@ #include "generated/xe_wa_oob.h" #include "xe_wa.h" -#define TEST_VM_ASYNC_OPS_ERROR - static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) { return vm->gpuvm.r_obj; @@ -2062,7 +2060,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; struct drm_gpuva_ops *ops; struct drm_gpuva_op *__op; - struct xe_vma_op *op; struct drm_gpuvm_bo *vm_bo; int err; @@ -2109,15 +2106,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, if (IS_ERR(ops)) return ops; -#ifdef TEST_VM_ASYNC_OPS_ERROR - if (operation & FORCE_ASYNC_OP_ERROR) { - op = list_first_entry_or_null(&ops->list, struct xe_vma_op, - base.entry); - if (op) - op->inject_error = true; - } -#endif - drm_gpuva_for_each_op(__op, ops) { struct xe_vma_op *op = gpuva_op_to_vma_op(__op); @@ -2560,13 +2548,6 @@ static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) lockdep_assert_held_write(&vm->lock); -#ifdef TEST_VM_ASYNC_OPS_ERROR - if (op->inject_error) { - op->inject_error = false; - return -ENOMEM; - } -#endif - switch (op->base.op) { case DRM_GPUVA_OP_MAP: ret = __xe_vma_op_execute(vm, op->map.vma, op); @@ -2726,16 +2707,9 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, return 0; } -#ifdef TEST_VM_ASYNC_OPS_ERROR -#define SUPPORTED_FLAGS \ - (FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_READONLY | \ - DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | 0xffff) -#else #define SUPPORTED_FLAGS \ (DRM_XE_VM_BIND_FLAG_READONLY | \ - DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \ - 0xffff) -#endif + DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL) #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 1fec66ae2eb2d..5ac9c5bebabc3 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -21,9 +21,6 @@ struct xe_bo; struct xe_sync_entry; struct xe_vm; -#define TEST_VM_ASYNC_OPS_ERROR -#define FORCE_ASYNC_OP_ERROR BIT(31) - #define XE_VMA_READ_ONLY DRM_GPUVA_USERBITS #define XE_VMA_DESTROYED (DRM_GPUVA_USERBITS << 1) #define XE_VMA_ATOMIC_PTE_BIT (DRM_GPUVA_USERBITS << 2) @@ -360,11 +357,6 @@ struct xe_vma_op { /** @flags: operation flags */ enum xe_vma_op_flags flags; -#ifdef TEST_VM_ASYNC_OPS_ERROR - /** @inject_error: inject error to test async op error handling */ - bool inject_error; -#endif - union { /** @map: VMA map operation specific data */ struct xe_vma_op_map map; -- GitLab From ab0beafd52b98dfb8b8244b2c6794efbc87478db Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 2 Feb 2024 10:09:34 +0100 Subject: [PATCH 0826/1405] netfilter: nft_set_pipapo: remove static in nft_pipapo_get() This has slipped through when reducing memory footprint for set elements, remove it. Fixes: 9dad402b89e8 ("netfilter: nf_tables: expose opaque set element as struct nft_elem_priv") Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index efd523496be45..f24ecdaa1c1ed 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -603,7 +603,7 @@ static struct nft_elem_priv * nft_pipapo_get(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags) { - static struct nft_pipapo_elem *e; + struct nft_pipapo_elem *e; e = pipapo_get(net, set, (const u8 *)elem->key.val.data, nft_genmask_cur(net)); -- GitLab From 2526dffc6d65cffa32b88556bd68e4e72e889a55 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 5 Feb 2024 11:22:29 +0100 Subject: [PATCH 0827/1405] gpio: remove GPIO device from the list unconditionally in error path Since commit 48e1b4d369cf ("gpiolib: remove the GPIO device from the list when it's unregistered") we remove the GPIO device entry from the global list (used to order devices by their GPIO ranges) when unregistering the chip, not when releasing the device. It will not happen when the last reference is put anymore. This means, we need to remove it in error path in gpiochip_add_data_with_key() unconditionally, without checking if the device's .release() callback is set. Fixes: 48e1b4d369cf ("gpiolib: remove the GPIO device from the list when it's unregistered") Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 44c8f5743a241..8b3a0f45b5745 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1005,15 +1005,15 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, err_free_gpiochip_mask: gpiochip_remove_pin_ranges(gc); gpiochip_free_valid_mask(gc); +err_remove_from_list: + spin_lock_irqsave(&gpio_lock, flags); + list_del(&gdev->list); + spin_unlock_irqrestore(&gpio_lock, flags); if (gdev->dev.release) { /* release() has been registered by gpiochip_setup_dev() */ gpio_device_put(gdev); goto err_print_message; } -err_remove_from_list: - spin_lock_irqsave(&gpio_lock, flags); - list_del(&gdev->list); - spin_unlock_irqrestore(&gpio_lock, flags); err_free_label: kfree_const(gdev->label); err_free_descs: -- GitLab From 9def04e759caa5a3d741891037ae99f81e2fff01 Mon Sep 17 00:00:00 2001 From: Sinthu Raja Date: Tue, 6 Feb 2024 06:29:27 +0530 Subject: [PATCH 0828/1405] net: ethernet: ti: cpsw_new: enable mac_managed_pm to fix mdio The below commit introduced a WARN when phy state is not in the states: PHY_HALTED, PHY_READY and PHY_UP. commit 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") When cpsw_new resumes, there have port in PHY_NOLINK state, so the below warning comes out. Set mac_managed_pm be true to tell mdio that the phy resume/suspend is managed by the mac, to fix the following warning: WARNING: CPU: 0 PID: 965 at drivers/net/phy/phy_device.c:326 mdio_bus_phy_resume+0x140/0x144 CPU: 0 PID: 965 Comm: sh Tainted: G O 6.1.46-g247b2535b2 #1 Hardware name: Generic AM33XX (Flattened Device Tree) unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x24/0x2c dump_stack_lvl from __warn+0x84/0x15c __warn from warn_slowpath_fmt+0x1a8/0x1c8 warn_slowpath_fmt from mdio_bus_phy_resume+0x140/0x144 mdio_bus_phy_resume from dpm_run_callback+0x3c/0x140 dpm_run_callback from device_resume+0xb8/0x2b8 device_resume from dpm_resume+0x144/0x314 dpm_resume from dpm_resume_end+0x14/0x20 dpm_resume_end from suspend_devices_and_enter+0xd0/0x924 suspend_devices_and_enter from pm_suspend+0x2e0/0x33c pm_suspend from state_store+0x74/0xd0 state_store from kernfs_fop_write_iter+0x104/0x1ec kernfs_fop_write_iter from vfs_write+0x1b8/0x358 vfs_write from ksys_write+0x78/0xf8 ksys_write from ret_fast_syscall+0x0/0x54 Exception stack(0xe094dfa8 to 0xe094dff0) dfa0: 00000004 005c3fb8 00000001 005c3fb8 00000004 00000001 dfc0: 00000004 005c3fb8 b6f6bba0 00000004 00000004 0059edb8 00000000 00000000 dfe0: 00000004 bed918f0 b6f09bd3 b6e89a66 Cc: # v6.0+ Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") Fixes: fba863b81604 ("net: phy: make PHY PM ops a no-op if MAC driver manages PHY PM") Signed-off-by: Sinthu Raja Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/cpsw_new.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 498c50c6d1a70..087dcb67505a2 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -773,6 +773,9 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) slave->slave_num); return; } + + phy->mac_managed_pm = true; + slave->phy = phy; phy_attached_info(slave->phy); -- GitLab From bc4ce46b1e3d1da4309405cd4afc7c0fcddd0b90 Mon Sep 17 00:00:00 2001 From: Sinthu Raja Date: Tue, 6 Feb 2024 06:29:28 +0530 Subject: [PATCH 0829/1405] net: ethernet: ti: cpsw: enable mac_managed_pm to fix mdio The below commit introduced a WARN when phy state is not in the states: PHY_HALTED, PHY_READY and PHY_UP. commit 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") When cpsw resumes, there have port in PHY_NOLINK state, so the below warning comes out. Set mac_managed_pm be true to tell mdio that the phy resume/suspend is managed by the mac, to fix the following warning: WARNING: CPU: 0 PID: 965 at drivers/net/phy/phy_device.c:326 mdio_bus_phy_resume+0x140/0x144 CPU: 0 PID: 965 Comm: sh Tainted: G O 6.1.46-g247b2535b2 #1 Hardware name: Generic AM33XX (Flattened Device Tree) unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x24/0x2c dump_stack_lvl from __warn+0x84/0x15c __warn from warn_slowpath_fmt+0x1a8/0x1c8 warn_slowpath_fmt from mdio_bus_phy_resume+0x140/0x144 mdio_bus_phy_resume from dpm_run_callback+0x3c/0x140 dpm_run_callback from device_resume+0xb8/0x2b8 device_resume from dpm_resume+0x144/0x314 dpm_resume from dpm_resume_end+0x14/0x20 dpm_resume_end from suspend_devices_and_enter+0xd0/0x924 suspend_devices_and_enter from pm_suspend+0x2e0/0x33c pm_suspend from state_store+0x74/0xd0 state_store from kernfs_fop_write_iter+0x104/0x1ec kernfs_fop_write_iter from vfs_write+0x1b8/0x358 vfs_write from ksys_write+0x78/0xf8 ksys_write from ret_fast_syscall+0x0/0x54 Exception stack(0xe094dfa8 to 0xe094dff0) dfa0: 00000004 005c3fb8 00000001 005c3fb8 00000004 00000001 dfc0: 00000004 005c3fb8 b6f6bba0 00000004 00000004 0059edb8 00000000 00000000 dfe0: 00000004 bed918f0 b6f09bd3 b6e89a66 Cc: # v6.0+ Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") Fixes: fba863b81604 ("net: phy: make PHY PM ops a no-op if MAC driver manages PHY PM") Signed-off-by: Sinthu Raja Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/cpsw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index ea85c6dd54846..c0a5abd8d9a8e 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -631,6 +631,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) } } + phy->mac_managed_pm = true; + slave->phy = phy; phy_attached_info(slave->phy); -- GitLab From db010ff6700f24f96e91ed56ca29cb69335008ef Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Tue, 6 Feb 2024 08:10:00 +0530 Subject: [PATCH 0830/1405] octeontx2-af: Initialize maps. kmalloc_array() without __GFP_ZERO flag does not initialize memory to zero. This causes issues. Use kcalloc() for maps and bitmap_zalloc() for bitmaps. Fixes: dd7842878633 ("octeontx2-af: Add new devlink param to configure maximum usable NIX block LFs") Signed-off-by: Ratheesh Kannoth Reviewed-by: Brett Creeley Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240206024000.1070260-1-rkannoth@marvell.com Signed-off-by: Paolo Abeni --- .../ethernet/marvell/octeontx2/af/rvu_npc.c | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 167145bdcb75d..8cfd74ad991cc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -1850,8 +1850,8 @@ void npc_mcam_rsrcs_deinit(struct rvu *rvu) { struct npc_mcam *mcam = &rvu->hw->mcam; - kfree(mcam->bmap); - kfree(mcam->bmap_reverse); + bitmap_free(mcam->bmap); + bitmap_free(mcam->bmap_reverse); kfree(mcam->entry2pfvf_map); kfree(mcam->cntr2pfvf_map); kfree(mcam->entry2cntr_map); @@ -1904,21 +1904,20 @@ int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr) mcam->pf_offset = mcam->nixlf_offset + nixlf_count; /* Allocate bitmaps for managing MCAM entries */ - mcam->bmap = kmalloc_array(BITS_TO_LONGS(mcam->bmap_entries), - sizeof(long), GFP_KERNEL); + mcam->bmap = bitmap_zalloc(mcam->bmap_entries, GFP_KERNEL); if (!mcam->bmap) return -ENOMEM; - mcam->bmap_reverse = kmalloc_array(BITS_TO_LONGS(mcam->bmap_entries), - sizeof(long), GFP_KERNEL); + mcam->bmap_reverse = bitmap_zalloc(mcam->bmap_entries, GFP_KERNEL); if (!mcam->bmap_reverse) goto free_bmap; mcam->bmap_fcnt = mcam->bmap_entries; /* Alloc memory for saving entry to RVU PFFUNC allocation mapping */ - mcam->entry2pfvf_map = kmalloc_array(mcam->bmap_entries, - sizeof(u16), GFP_KERNEL); + mcam->entry2pfvf_map = kcalloc(mcam->bmap_entries, sizeof(u16), + GFP_KERNEL); + if (!mcam->entry2pfvf_map) goto free_bmap_reverse; @@ -1941,21 +1940,21 @@ int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr) if (err) goto free_entry_map; - mcam->cntr2pfvf_map = kmalloc_array(mcam->counters.max, - sizeof(u16), GFP_KERNEL); + mcam->cntr2pfvf_map = kcalloc(mcam->counters.max, sizeof(u16), + GFP_KERNEL); if (!mcam->cntr2pfvf_map) goto free_cntr_bmap; /* Alloc memory for MCAM entry to counter mapping and for tracking * counter's reference count. */ - mcam->entry2cntr_map = kmalloc_array(mcam->bmap_entries, - sizeof(u16), GFP_KERNEL); + mcam->entry2cntr_map = kcalloc(mcam->bmap_entries, sizeof(u16), + GFP_KERNEL); if (!mcam->entry2cntr_map) goto free_cntr_map; - mcam->cntr_refcnt = kmalloc_array(mcam->counters.max, - sizeof(u16), GFP_KERNEL); + mcam->cntr_refcnt = kcalloc(mcam->counters.max, sizeof(u16), + GFP_KERNEL); if (!mcam->cntr_refcnt) goto free_entry_cntr_map; @@ -1988,9 +1987,9 @@ int npc_mcam_rsrcs_init(struct rvu *rvu, int blkaddr) free_entry_map: kfree(mcam->entry2pfvf_map); free_bmap_reverse: - kfree(mcam->bmap_reverse); + bitmap_free(mcam->bmap_reverse); free_bmap: - kfree(mcam->bmap); + bitmap_free(mcam->bmap); return -ENOMEM; } -- GitLab From 27c5a095e2518975e20a10102908ae8231699879 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sun, 4 Feb 2024 16:26:42 +0100 Subject: [PATCH 0831/1405] netfilter: ipset: Missing gc cancellations fixed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The patch fdb8e12cc2cc ("netfilter: ipset: fix performance regression in swap operation") missed to add the calls to gc cancellations at the error path of create operations and at module unload. Also, because the half of the destroy operations now executed by a function registered by call_rcu(), neither NFNL_SUBSYS_IPSET mutex or rcu read lock is held and therefore the checking of them results false warnings. Fixes: 97f7cf1cd80e ("netfilter: ipset: fix performance regression in swap operation") Reported-by: syzbot+52bbc0ad036f6f0d4a25@syzkaller.appspotmail.com Reported-by: Brad Spengler Reported-by: Стас Ничипорович Tested-by: Brad Spengler Tested-by: Стас Ничипорович Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 2 ++ net/netfilter/ipset/ip_set_hash_gen.h | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index bcaad9c009fe0..3184cc6be4c9d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1154,6 +1154,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info, return ret; cleanup: + set->variant->cancel_gc(set); set->variant->destroy(set); put_out: module_put(set->type->me); @@ -2378,6 +2379,7 @@ ip_set_net_exit(struct net *net) set = ip_set(inst, i); if (set) { ip_set(inst, i) = NULL; + set->variant->cancel_gc(set); ip_set_destroy_set(set); } } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 1136510521a80..cf3ce72c3de64 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -432,7 +432,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) u32 i; for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = __ipset_dereference(hbucket(t, i)); + n = (__force struct hbucket *)hbucket(t, i); if (!n) continue; if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) @@ -452,7 +452,7 @@ mtype_destroy(struct ip_set *set) struct htype *h = set->data; struct list_head *l, *lt; - mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); + mtype_ahash_destroy(set, (__force struct htable *)h->table, true); list_for_each_safe(l, lt, &h->ad) { list_del(l); kfree(l); -- GitLab From 2fe8a236436fe40d8d26a1af8d150fc80f04ee1a Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Tue, 6 Feb 2024 09:58:49 +0100 Subject: [PATCH 0832/1405] s390/qeth: Fix potential loss of L3-IP@ in case of network issues Symptom: In case of a bad cable connection (e.g. dirty optics) a fast sequence of network DOWN-UP-DOWN-UP could happen. UP triggers recovery of the qeth interface. In case of a second DOWN while recovery is still ongoing, it can happen that the IP@ of a Layer3 qeth interface is lost and will not be recovered by the second UP. Problem: When registration of IP addresses with Layer 3 qeth devices fails, (e.g. because of bad address format) the respective IP address is deleted from its hash-table in the driver. If registration fails because of a ENETDOWN condition, the address should stay in the hashtable, so a subsequent recovery can restore it. 3caa4af834df ("qeth: keep ip-address after LAN_OFFLINE failure") fixes this for registration failures during normal operation, but not during recovery. Solution: Keep L3-IP address in case of ENETDOWN in qeth_l3_recover_ip(). For consistency with qeth_l3_add_ip() we also keep it in case of EADDRINUSE, i.e. for some reason the card already/still has this address registered. Fixes: 4a71df50047f ("qeth: new qeth device driver") Cc: stable@vger.kernel.org Signed-off-by: Alexandra Winter Link: https://lore.kernel.org/r/20240206085849.2902775-1-wintera@linux.ibm.com Signed-off-by: Paolo Abeni --- drivers/s390/net/qeth_l3_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index b92a32b4b1141..04c64ce0a1ca1 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -255,9 +255,10 @@ static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover) if (!recover) { hash_del(&addr->hnode); kfree(addr); - continue; + } else { + /* prepare for recovery */ + addr->disp_flag = QETH_DISP_ADDR_ADD; } - addr->disp_flag = QETH_DISP_ADDR_ADD; } mutex_unlock(&card->ip_lock); @@ -278,9 +279,11 @@ static void qeth_l3_recover_ip(struct qeth_card *card) if (addr->disp_flag == QETH_DISP_ADDR_ADD) { rc = qeth_l3_register_addr_entry(card, addr); - if (!rc) { + if (!rc || rc == -EADDRINUSE || rc == -ENETDOWN) { + /* keep it in the records */ addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING; } else { + /* bad address */ hash_del(&addr->hnode); kfree(addr); } -- GitLab From fa173a1b4e3fd1ab5451cbc57de6fc624c824b0a Mon Sep 17 00:00:00 2001 From: Felix Huettner Date: Mon, 5 Feb 2024 09:59:59 +0000 Subject: [PATCH 0833/1405] netfilter: ctnetlink: fix filtering for zone 0 previously filtering for the default zone would actually skip the zone filter and flush all zones. Fixes: eff3c558bb7e ("netfilter: ctnetlink: support filtering by zone") Reported-by: Ilya Maximets Closes: https://lore.kernel.org/netdev/2032238f-31ac-4106-8f22-522e76df5a12@ovn.org/ Signed-off-by: Felix Huettner Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 12 ++++-- .../netfilter/conntrack_dump_flush.c | 43 ++++++++++++++++++- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 0c22a02c2035c..3b846cbdc050d 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -876,6 +876,7 @@ struct ctnetlink_filter_u32 { struct ctnetlink_filter { u8 family; + bool zone_filter; u_int32_t orig_flags; u_int32_t reply_flags; @@ -992,9 +993,12 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) if (err) goto err_filter; - err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone); - if (err < 0) - goto err_filter; + if (cda[CTA_ZONE]) { + err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone); + if (err < 0) + goto err_filter; + filter->zone_filter = true; + } if (!cda[CTA_FILTER]) return filter; @@ -1148,7 +1152,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) if (filter->family && nf_ct_l3num(ct) != filter->family) goto ignore_entry; - if (filter->zone.id != NF_CT_DEFAULT_ZONE_ID && + if (filter->zone_filter && !nf_ct_zone_equal_any(ct, &filter->zone)) goto ignore_entry; diff --git a/tools/testing/selftests/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/netfilter/conntrack_dump_flush.c index f18c6db13bbff..b11ea8ee67194 100644 --- a/tools/testing/selftests/netfilter/conntrack_dump_flush.c +++ b/tools/testing/selftests/netfilter/conntrack_dump_flush.c @@ -13,7 +13,7 @@ #include "../kselftest_harness.h" #define TEST_ZONE_ID 123 -#define CTA_FILTER_F_CTA_TUPLE_ZONE (1 << 2) +#define NF_CT_DEFAULT_ZONE_ID 0 static int reply_counter; @@ -336,6 +336,9 @@ FIXTURE_SETUP(conntrack_dump_flush) ret = conntrack_data_generate_v4(self->sock, 0xf4f4f4f4, 0xf5f5f5f5, TEST_ZONE_ID + 2); EXPECT_EQ(ret, 0); + ret = conntrack_data_generate_v4(self->sock, 0xf6f6f6f6, 0xf7f7f7f7, + NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); src = (struct in6_addr) {{ .__u6_addr32 = { @@ -395,6 +398,26 @@ FIXTURE_SETUP(conntrack_dump_flush) TEST_ZONE_ID + 2); EXPECT_EQ(ret, 0); + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x07000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x08000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); EXPECT_GE(ret, 2); if (ret > 2) @@ -425,6 +448,24 @@ TEST_F(conntrack_dump_flush, test_flush_by_zone) EXPECT_EQ(ret, 2); ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2); EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 2); +} + +TEST_F(conntrack_dump_flush, test_flush_by_zone_default) +{ + int ret; + + ret = conntrack_flush_zone(self->sock, NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); } TEST_HARNESS_MAIN -- GitLab From 38ed1c7062ada30d7c11e7a7acc749bf27aa14aa Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 5 Feb 2024 14:59:24 +0100 Subject: [PATCH 0834/1405] netfilter: nft_ct: reject direction for ct id Direction attribute is ignored, reject it in case this ever needs to be supported Fixes: 3087c3f7c23b ("netfilter: nft_ct: Add ct id support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index aac98a3c966e9..bfd3e5a14dab6 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -476,6 +476,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, break; #endif case NFT_CT_ID: + if (tb[NFTA_CT_DIRECTION]) + return -EINVAL; + len = sizeof(u32); break; default: -- GitLab From 7395dfacfff65e9938ac0889dafa1ab01e987d15 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 6 Feb 2024 00:11:40 +0100 Subject: [PATCH 0835/1405] netfilter: nf_tables: use timestamp to check for set element timeout Add a timestamp field at the beginning of the transaction, store it in the nftables per-netns area. Update set backend .insert, .deactivate and sync gc path to use the timestamp, this avoids that an element expires while control plane transaction is still unfinished. .lookup and .update, which are used from packet path, still use the current time to check if the element has expired. And .get path and dump also since this runs lockless under rcu read size lock. Then, there is async gc which also needs to check the current time since it runs asynchronously from a workqueue. Fixes: c3e1b005ed1c ("netfilter: nf_tables: add set element timeout support") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 16 ++++++++++++++-- net/netfilter/nf_tables_api.c | 4 +++- net/netfilter/nft_set_hash.c | 8 +++++++- net/netfilter/nft_set_pipapo.c | 18 +++++++++++------- net/netfilter/nft_set_rbtree.c | 11 +++++++---- 5 files changed, 42 insertions(+), 15 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 001226c346215..510244cc0f8f0 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -808,10 +808,16 @@ static inline struct nft_set_elem_expr *nft_set_ext_expr(const struct nft_set_ex return nft_set_ext(ext, NFT_SET_EXT_EXPRESSIONS); } -static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) +static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext, + u64 tstamp) { return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) && - time_is_before_eq_jiffies64(*nft_set_ext_expiration(ext)); + time_after_eq64(tstamp, *nft_set_ext_expiration(ext)); +} + +static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) +{ + return __nft_set_elem_expired(ext, get_jiffies_64()); } static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, @@ -1779,6 +1785,7 @@ struct nftables_pernet { struct list_head notify_list; struct mutex commit_mutex; u64 table_handle; + u64 tstamp; unsigned int base_seq; unsigned int gc_seq; u8 validate_state; @@ -1791,6 +1798,11 @@ static inline struct nftables_pernet *nft_pernet(const struct net *net) return net_generic(net, nf_tables_net_id); } +static inline u64 nft_net_tstamp(const struct net *net) +{ + return nft_pernet(net)->tstamp; +} + #define __NFT_REDUCE_READONLY 1UL #define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fc016befb46ff..f8e3f70c35bd5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9827,6 +9827,7 @@ struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc, struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc) { struct nft_set_elem_catchall *catchall, *next; + u64 tstamp = nft_net_tstamp(gc->net); const struct nft_set *set = gc->set; struct nft_elem_priv *elem_priv; struct nft_set_ext *ext; @@ -9836,7 +9837,7 @@ struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc) list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); - if (!nft_set_elem_expired(ext)) + if (!__nft_set_elem_expired(ext, tstamp)) continue; gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); @@ -10622,6 +10623,7 @@ static bool nf_tables_valid_genid(struct net *net, u32 genid) bool genid_ok; mutex_lock(&nft_net->commit_mutex); + nft_net->tstamp = get_jiffies_64(); genid_ok = genid == 0 || nft_net->base_seq == genid; if (!genid_ok) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 6c2061bfdae6c..6968a3b342367 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -36,6 +36,7 @@ struct nft_rhash_cmp_arg { const struct nft_set *set; const u32 *key; u8 genmask; + u64 tstamp; }; static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed) @@ -62,7 +63,7 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg, return 1; if (nft_set_elem_is_dead(&he->ext)) return 1; - if (nft_set_elem_expired(&he->ext)) + if (__nft_set_elem_expired(&he->ext, x->tstamp)) return 1; if (!nft_set_elem_active(&he->ext, x->genmask)) return 1; @@ -87,6 +88,7 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set, .genmask = nft_genmask_cur(net), .set = set, .key = key, + .tstamp = get_jiffies_64(), }; he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); @@ -106,6 +108,7 @@ nft_rhash_get(const struct net *net, const struct nft_set *set, .genmask = nft_genmask_cur(net), .set = set, .key = elem->key.val.data, + .tstamp = get_jiffies_64(), }; he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); @@ -131,6 +134,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, .genmask = NFT_GENMASK_ANY, .set = set, .key = key, + .tstamp = get_jiffies_64(), }; he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); @@ -175,6 +179,7 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set, .genmask = nft_genmask_next(net), .set = set, .key = elem->key.val.data, + .tstamp = nft_net_tstamp(net), }; struct nft_rhash_elem *prev; @@ -216,6 +221,7 @@ nft_rhash_deactivate(const struct net *net, const struct nft_set *set, .genmask = nft_genmask_next(net), .set = set, .key = elem->key.val.data, + .tstamp = nft_net_tstamp(net), }; rcu_read_lock(); diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index f24ecdaa1c1ed..b17c182034166 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -504,6 +504,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, * @set: nftables API set representation * @data: Key data to be matched against existing elements * @genmask: If set, check that element is active in given genmask + * @tstamp: timestamp to check for expired elements * * This is essentially the same as the lookup function, except that it matches * key data against the uncommitted copy and doesn't use preallocated maps for @@ -513,7 +514,8 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, */ static struct nft_pipapo_elem *pipapo_get(const struct net *net, const struct nft_set *set, - const u8 *data, u8 genmask) + const u8 *data, u8 genmask, + u64 tstamp) { struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT); struct nft_pipapo *priv = nft_set_priv(set); @@ -566,7 +568,7 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, goto out; if (last) { - if (nft_set_elem_expired(&f->mt[b].e->ext)) + if (__nft_set_elem_expired(&f->mt[b].e->ext, tstamp)) goto next_match; if ((genmask && !nft_set_elem_active(&f->mt[b].e->ext, genmask))) @@ -606,7 +608,7 @@ nft_pipapo_get(const struct net *net, const struct nft_set *set, struct nft_pipapo_elem *e; e = pipapo_get(net, set, (const u8 *)elem->key.val.data, - nft_genmask_cur(net)); + nft_genmask_cur(net), get_jiffies_64()); if (IS_ERR(e)) return ERR_CAST(e); @@ -1173,6 +1175,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, struct nft_pipapo_match *m = priv->clone; u8 genmask = nft_genmask_next(net); struct nft_pipapo_elem *e, *dup; + u64 tstamp = nft_net_tstamp(net); struct nft_pipapo_field *f; const u8 *start_p, *end_p; int i, bsize_max, err = 0; @@ -1182,7 +1185,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, else end = start; - dup = pipapo_get(net, set, start, genmask); + dup = pipapo_get(net, set, start, genmask, tstamp); if (!IS_ERR(dup)) { /* Check if we already have the same exact entry */ const struct nft_data *dup_key, *dup_end; @@ -1204,7 +1207,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, if (PTR_ERR(dup) == -ENOENT) { /* Look for partially overlapping entries */ - dup = pipapo_get(net, set, end, nft_genmask_next(net)); + dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp); } if (PTR_ERR(dup) != -ENOENT) { @@ -1560,6 +1563,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) { struct nft_pipapo *priv = nft_set_priv(set); struct net *net = read_pnet(&set->net); + u64 tstamp = nft_net_tstamp(net); int rules_f0, first_rule = 0; struct nft_pipapo_elem *e; struct nft_trans_gc *gc; @@ -1594,7 +1598,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) /* synchronous gc never fails, there is no need to set on * NFT_SET_ELEM_DEAD_BIT. */ - if (nft_set_elem_expired(&e->ext)) { + if (__nft_set_elem_expired(&e->ext, tstamp)) { priv->dirty = true; gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); @@ -1769,7 +1773,7 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set, { struct nft_pipapo_elem *e; - e = pipapo_get(net, set, data, nft_genmask_next(net)); + e = pipapo_get(net, set, data, nft_genmask_next(net), nft_net_tstamp(net)); if (IS_ERR(e)) return NULL; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index baa3fea4fe65c..5fd74f993988f 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -313,6 +313,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree *priv = nft_set_priv(set); u8 cur_genmask = nft_genmask_cur(net); u8 genmask = nft_genmask_next(net); + u64 tstamp = nft_net_tstamp(net); int d; /* Descend the tree to search for an existing element greater than the @@ -360,7 +361,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, /* perform garbage collection to avoid bogus overlap reports * but skip new elements in this transaction. */ - if (nft_set_elem_expired(&rbe->ext) && + if (__nft_set_elem_expired(&rbe->ext, tstamp) && nft_set_elem_active(&rbe->ext, cur_genmask)) { const struct nft_rbtree_elem *removed_end; @@ -551,6 +552,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, const struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; u8 genmask = nft_genmask_next(net); + u64 tstamp = nft_net_tstamp(net); int d; while (parent != NULL) { @@ -571,7 +573,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, nft_rbtree_interval_end(this)) { parent = parent->rb_right; continue; - } else if (nft_set_elem_expired(&rbe->ext)) { + } else if (__nft_set_elem_expired(&rbe->ext, tstamp)) { break; } else if (!nft_set_elem_active(&rbe->ext, genmask)) { parent = parent->rb_left; @@ -624,9 +626,10 @@ static void nft_rbtree_gc(struct nft_set *set) { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe, *rbe_end = NULL; + struct net *net = read_pnet(&set->net); + u64 tstamp = nft_net_tstamp(net); struct rb_node *node, *next; struct nft_trans_gc *gc; - struct net *net; set = nft_set_container_of(priv); net = read_pnet(&set->net); @@ -648,7 +651,7 @@ static void nft_rbtree_gc(struct nft_set *set) rbe_end = rbe; continue; } - if (!nft_set_elem_expired(&rbe->ext)) + if (!__nft_set_elem_expired(&rbe->ext, tstamp)) continue; gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); -- GitLab From f82777e8ce6c039cdcacbcf1eb8619b99a20c06d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 6 Feb 2024 17:54:18 +0100 Subject: [PATCH 0836/1405] netfilter: nfnetlink_queue: un-break NF_REPEAT Only override userspace verdict if the ct hook returns something other than ACCEPT. Else, this replaces NF_REPEAT (run all hooks again) with NF_ACCEPT (move to next hook). Fixes: 6291b3a67ad5 ("netfilter: conntrack: convert nf_conntrack_update to netfilter verdicts") Reported-by: l.6diay@passmail.com Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 171d1f52d3dd0..5cf38fc0a366a 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -232,18 +232,25 @@ static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict) if (verdict == NF_ACCEPT || verdict == NF_REPEAT || verdict == NF_STOP) { + unsigned int ct_verdict = verdict; + rcu_read_lock(); ct_hook = rcu_dereference(nf_ct_hook); if (ct_hook) - verdict = ct_hook->update(entry->state.net, entry->skb); + ct_verdict = ct_hook->update(entry->state.net, entry->skb); rcu_read_unlock(); - switch (verdict & NF_VERDICT_MASK) { + switch (ct_verdict & NF_VERDICT_MASK) { + case NF_ACCEPT: + /* follow userspace verdict, could be REPEAT */ + break; case NF_STOLEN: nf_queue_entry_free(entry); return; + default: + verdict = ct_verdict & NF_VERDICT_MASK; + break; } - } nf_reinject(entry, verdict); } -- GitLab From 60c0c230c6f046da536d3df8b39a20b9a9fd6af0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 7 Feb 2024 18:49:51 +0100 Subject: [PATCH 0837/1405] netfilter: nft_set_rbtree: skip end interval element from gc rbtree lazy gc on insert might collect an end interval element that has been just added in this transactions, skip end interval elements that are not yet active. Fixes: f718863aca46 ("netfilter: nft_set_rbtree: fix overlap expiration walk") Cc: stable@vger.kernel.org Reported-by: lonial con Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_rbtree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 5fd74f993988f..9944fe479e536 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -234,7 +234,7 @@ static void nft_rbtree_gc_elem_remove(struct net *net, struct nft_set *set, static const struct nft_rbtree_elem * nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, - struct nft_rbtree_elem *rbe, u8 genmask) + struct nft_rbtree_elem *rbe) { struct nft_set *set = (struct nft_set *)__set; struct rb_node *prev = rb_prev(&rbe->node); @@ -253,7 +253,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, while (prev) { rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); if (nft_rbtree_interval_end(rbe_prev) && - nft_set_elem_active(&rbe_prev->ext, genmask)) + nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY)) break; prev = rb_prev(prev); @@ -365,7 +365,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, nft_set_elem_active(&rbe->ext, cur_genmask)) { const struct nft_rbtree_elem *removed_end; - removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask); + removed_end = nft_rbtree_gc_elem(set, priv, rbe); if (IS_ERR(removed_end)) return PTR_ERR(removed_end); -- GitLab From 76313d1a4aa9e30d5b43dee5efd8bcd4d8250006 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 7 Feb 2024 21:52:46 +0100 Subject: [PATCH 0838/1405] netfilter: nft_set_pipapo: store index in scratch maps Pipapo needs a scratchpad area to keep state during matching. This state can be large and thus cannot reside on stack. Each set preallocates percpu areas for this. On each match stage, one scratchpad half starts with all-zero and the other is inited to all-ones. At the end of each stage, the half that starts with all-ones is always zero. Before next field is tested, pointers to the two halves are swapped, i.e. resmap pointer turns into fill pointer and vice versa. After the last field has been processed, pipapo stashes the index toggle in a percpu variable, with assumption that next packet will start with the all-zero half and sets all bits in the other to 1. This isn't reliable. There can be multiple sets and we can't be sure that the upper and lower half of all set scratch map is always in sync (lookups can be conditional), so one set might have swapped, but other might not have been queried. Thus we need to keep the index per-set-and-cpu, just like the scratchpad. Note that this bug fix is incomplete, there is a related issue. avx2 and normal implementation might use slightly different areas of the map array space due to the avx2 alignment requirements, so m->scratch (generic/fallback implementation) and ->scratch_aligned (avx) may partially overlap. scratch and scratch_aligned are not distinct objects, the latter is just the aligned address of the former. After this change, write to scratch_align->map_index may write to scratch->map, so this issue becomes more prominent, we can set to 1 a bit in the supposedly-all-zero area of scratch->map[]. A followup patch will remove the scratch_aligned and makes generic and avx code use the same (aligned) area. Its done in a separate change to ease review. Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo.c | 41 ++++++++++++++++++----------- net/netfilter/nft_set_pipapo.h | 14 ++++++++-- net/netfilter/nft_set_pipapo_avx2.c | 15 +++++------ 3 files changed, 44 insertions(+), 26 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index b17c182034166..54d0bac140a3b 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -342,9 +342,6 @@ #include "nft_set_pipapo_avx2.h" #include "nft_set_pipapo.h" -/* Current working bitmap index, toggled between field matches */ -static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index); - /** * pipapo_refill() - For each set bit, set bits from selected mapping table item * @map: Bitmap to be scanned for set bits @@ -412,6 +409,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { struct nft_pipapo *priv = nft_set_priv(set); + struct nft_pipapo_scratch *scratch; unsigned long *res_map, *fill_map; u8 genmask = nft_genmask_cur(net); const u8 *rp = (const u8 *)key; @@ -422,15 +420,17 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, local_bh_disable(); - map_index = raw_cpu_read(nft_pipapo_scratch_index); - m = rcu_dereference(priv->match); if (unlikely(!m || !*raw_cpu_ptr(m->scratch))) goto out; - res_map = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0); - fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max); + scratch = *raw_cpu_ptr(m->scratch); + + map_index = scratch->map_index; + + res_map = scratch->map + (map_index ? m->bsize_max : 0); + fill_map = scratch->map + (map_index ? 0 : m->bsize_max); memset(res_map, 0xff, m->bsize_max * sizeof(*res_map)); @@ -460,7 +460,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt, last); if (b < 0) { - raw_cpu_write(nft_pipapo_scratch_index, map_index); + scratch->map_index = map_index; local_bh_enable(); return false; @@ -477,7 +477,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, * current inactive bitmap is clean and can be reused as * *next* bitmap (not initial) for the next packet. */ - raw_cpu_write(nft_pipapo_scratch_index, map_index); + scratch->map_index = map_index; local_bh_enable(); return true; @@ -1123,12 +1123,12 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, int i; for_each_possible_cpu(i) { - unsigned long *scratch; + struct nft_pipapo_scratch *scratch; #ifdef NFT_PIPAPO_ALIGN - unsigned long *scratch_aligned; + void *scratch_aligned; #endif - - scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 + + scratch = kzalloc_node(struct_size(scratch, map, + bsize_max * 2) + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL, cpu_to_node(i)); if (!scratch) { @@ -1147,7 +1147,16 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, *per_cpu_ptr(clone->scratch, i) = scratch; #ifdef NFT_PIPAPO_ALIGN - scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch); + /* Align &scratch->map (not the struct itself): the extra + * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node() + * above guarantee we can waste up to those bytes in order + * to align the map field regardless of its offset within + * the struct. + */ + BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM); + + scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map); + scratch_aligned -= offsetof(struct nft_pipapo_scratch, map); *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned; #endif } @@ -2136,7 +2145,7 @@ static int nft_pipapo_init(const struct nft_set *set, m->field_count = field_count; m->bsize_max = 0; - m->scratch = alloc_percpu(unsigned long *); + m->scratch = alloc_percpu(struct nft_pipapo_scratch *); if (!m->scratch) { err = -ENOMEM; goto out_scratch; @@ -2145,7 +2154,7 @@ static int nft_pipapo_init(const struct nft_set *set, *per_cpu_ptr(m->scratch, i) = NULL; #ifdef NFT_PIPAPO_ALIGN - m->scratch_aligned = alloc_percpu(unsigned long *); + m->scratch_aligned = alloc_percpu(struct nft_pipapo_scratch *); if (!m->scratch_aligned) { err = -ENOMEM; goto out_free; diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 1040223da5fa3..d3bc1551694f3 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -130,6 +130,16 @@ struct nft_pipapo_field { union nft_pipapo_map_bucket *mt; }; +/** + * struct nft_pipapo_scratch - percpu data used for lookup and matching + * @map_index: Current working bitmap index, toggled between field matches + * @map: store partial matching results during lookup + */ +struct nft_pipapo_scratch { + u8 map_index; + unsigned long map[]; +}; + /** * struct nft_pipapo_match - Data used for lookup and matching * @field_count Amount of fields in set @@ -142,9 +152,9 @@ struct nft_pipapo_field { struct nft_pipapo_match { int field_count; #ifdef NFT_PIPAPO_ALIGN - unsigned long * __percpu *scratch_aligned; + struct nft_pipapo_scratch * __percpu *scratch_aligned; #endif - unsigned long * __percpu *scratch; + struct nft_pipapo_scratch * __percpu *scratch; size_t bsize_max; struct rcu_head rcu; struct nft_pipapo_field f[] __counted_by(field_count); diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 52e0d026d30ad..78213c73af2e2 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -71,9 +71,6 @@ #define NFT_PIPAPO_AVX2_ZERO(reg) \ asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg) -/* Current working bitmap index, toggled between field matches */ -static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index); - /** * nft_pipapo_avx2_prepare() - Prepare before main algorithm body * @@ -1120,11 +1117,12 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { struct nft_pipapo *priv = nft_set_priv(set); - unsigned long *res, *fill, *scratch; + struct nft_pipapo_scratch *scratch; u8 genmask = nft_genmask_cur(net); const u8 *rp = (const u8 *)key; struct nft_pipapo_match *m; struct nft_pipapo_field *f; + unsigned long *res, *fill; bool map_index; int i, ret = 0; @@ -1146,10 +1144,11 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, kernel_fpu_end(); return false; } - map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index); - res = scratch + (map_index ? m->bsize_max : 0); - fill = scratch + (map_index ? 0 : m->bsize_max); + map_index = scratch->map_index; + + res = scratch->map + (map_index ? m->bsize_max : 0); + fill = scratch->map + (map_index ? 0 : m->bsize_max); /* Starting map doesn't need to be set for this implementation */ @@ -1221,7 +1220,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, out: if (i % 2) - raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index); + scratch->map_index = !map_index; kernel_fpu_end(); return ret >= 0; -- GitLab From 47b1c03c3c1a119435480a1e73f27197dc59131d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 7 Feb 2024 21:52:47 +0100 Subject: [PATCH 0839/1405] netfilter: nft_set_pipapo: add helper to release pcpu scratch area After next patch simple kfree() is not enough anymore, so add a helper for it. Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 54d0bac140a3b..5094d4c439c3b 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1110,6 +1110,24 @@ static void pipapo_map(struct nft_pipapo_match *m, f->mt[map[i].to + j].e = e; } +/** + * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address + * @m: Matching data + * @cpu: CPU number + */ +static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu) +{ + struct nft_pipapo_scratch *s; + void *mem; + + s = *per_cpu_ptr(m->scratch, cpu); + if (!s) + return; + + mem = s; + kfree(mem); +} + /** * pipapo_realloc_scratch() - Reallocate scratch maps for partial match results * @clone: Copy of matching data with pending insertions and deletions @@ -1142,7 +1160,7 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, return -ENOMEM; } - kfree(*per_cpu_ptr(clone->scratch, i)); + pipapo_free_scratch(clone, i); *per_cpu_ptr(clone->scratch, i) = scratch; @@ -1369,7 +1387,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) } out_scratch_realloc: for_each_possible_cpu(i) - kfree(*per_cpu_ptr(new->scratch, i)); + pipapo_free_scratch(new, i); #ifdef NFT_PIPAPO_ALIGN free_percpu(new->scratch_aligned); #endif @@ -1653,7 +1671,7 @@ static void pipapo_free_match(struct nft_pipapo_match *m) int i; for_each_possible_cpu(i) - kfree(*per_cpu_ptr(m->scratch, i)); + pipapo_free_scratch(m, i); #ifdef NFT_PIPAPO_ALIGN free_percpu(m->scratch_aligned); @@ -2253,7 +2271,7 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, free_percpu(m->scratch_aligned); #endif for_each_possible_cpu(cpu) - kfree(*per_cpu_ptr(m->scratch, cpu)); + pipapo_free_scratch(m, cpu); free_percpu(m->scratch); pipapo_free_fields(m); kfree(m); @@ -2270,7 +2288,7 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, free_percpu(priv->clone->scratch_aligned); #endif for_each_possible_cpu(cpu) - kfree(*per_cpu_ptr(priv->clone->scratch, cpu)); + pipapo_free_scratch(priv->clone, cpu); free_percpu(priv->clone->scratch); pipapo_free_fields(priv->clone); -- GitLab From 5a8cdf6fd860ac5e6d08d72edbcecee049a7fec4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 8 Feb 2024 10:31:29 +0100 Subject: [PATCH 0840/1405] netfilter: nft_set_pipapo: remove scratch_aligned pointer use ->scratch for both avx2 and the generic implementation. After previous change the scratch->map member is always aligned properly for AVX2, so we can just use scratch->map in AVX2 too. The alignoff delta is stored in the scratchpad so we can reconstruct the correct address to free the area again. Fixes: 7400b063969b ("nft_set_pipapo: Introduce AVX2-based lookup implementation") Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo.c | 41 +++++------------------------ net/netfilter/nft_set_pipapo.h | 6 ++--- net/netfilter/nft_set_pipapo_avx2.c | 2 +- 3 files changed, 10 insertions(+), 39 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 5094d4c439c3b..aa1d9e93a9a04 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1125,6 +1125,7 @@ static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int c return; mem = s; + mem -= s->align_off; kfree(mem); } @@ -1144,6 +1145,7 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, struct nft_pipapo_scratch *scratch; #ifdef NFT_PIPAPO_ALIGN void *scratch_aligned; + u32 align_off; #endif scratch = kzalloc_node(struct_size(scratch, map, bsize_max * 2) + @@ -1162,8 +1164,6 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, pipapo_free_scratch(clone, i); - *per_cpu_ptr(clone->scratch, i) = scratch; - #ifdef NFT_PIPAPO_ALIGN /* Align &scratch->map (not the struct itself): the extra * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node() @@ -1175,8 +1175,12 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map); scratch_aligned -= offsetof(struct nft_pipapo_scratch, map); - *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned; + align_off = scratch_aligned - (void *)scratch; + + scratch = scratch_aligned; + scratch->align_off = align_off; #endif + *per_cpu_ptr(clone->scratch, i) = scratch; } return 0; @@ -1331,11 +1335,6 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) if (!new->scratch) goto out_scratch; -#ifdef NFT_PIPAPO_ALIGN - new->scratch_aligned = alloc_percpu(*new->scratch_aligned); - if (!new->scratch_aligned) - goto out_scratch; -#endif for_each_possible_cpu(i) *per_cpu_ptr(new->scratch, i) = NULL; @@ -1388,9 +1387,6 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) out_scratch_realloc: for_each_possible_cpu(i) pipapo_free_scratch(new, i); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(new->scratch_aligned); -#endif out_scratch: free_percpu(new->scratch); kfree(new); @@ -1673,11 +1669,7 @@ static void pipapo_free_match(struct nft_pipapo_match *m) for_each_possible_cpu(i) pipapo_free_scratch(m, i); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(m->scratch_aligned); -#endif free_percpu(m->scratch); - pipapo_free_fields(m); kfree(m); @@ -2171,16 +2163,6 @@ static int nft_pipapo_init(const struct nft_set *set, for_each_possible_cpu(i) *per_cpu_ptr(m->scratch, i) = NULL; -#ifdef NFT_PIPAPO_ALIGN - m->scratch_aligned = alloc_percpu(struct nft_pipapo_scratch *); - if (!m->scratch_aligned) { - err = -ENOMEM; - goto out_free; - } - for_each_possible_cpu(i) - *per_cpu_ptr(m->scratch_aligned, i) = NULL; -#endif - rcu_head_init(&m->rcu); nft_pipapo_for_each_field(f, i, m) { @@ -2211,9 +2193,6 @@ static int nft_pipapo_init(const struct nft_set *set, return 0; out_free: -#ifdef NFT_PIPAPO_ALIGN - free_percpu(m->scratch_aligned); -#endif free_percpu(m->scratch); out_scratch: kfree(m); @@ -2267,9 +2246,6 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, nft_set_pipapo_match_destroy(ctx, set, m); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(m->scratch_aligned); -#endif for_each_possible_cpu(cpu) pipapo_free_scratch(m, cpu); free_percpu(m->scratch); @@ -2284,9 +2260,6 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, if (priv->dirty) nft_set_pipapo_match_destroy(ctx, set, m); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(priv->clone->scratch_aligned); -#endif for_each_possible_cpu(cpu) pipapo_free_scratch(priv->clone, cpu); free_percpu(priv->clone->scratch); diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index d3bc1551694f3..f59a0cd811051 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -133,10 +133,12 @@ struct nft_pipapo_field { /** * struct nft_pipapo_scratch - percpu data used for lookup and matching * @map_index: Current working bitmap index, toggled between field matches + * @align_off: Offset to get the originally allocated address * @map: store partial matching results during lookup */ struct nft_pipapo_scratch { u8 map_index; + u32 align_off; unsigned long map[]; }; @@ -144,16 +146,12 @@ struct nft_pipapo_scratch { * struct nft_pipapo_match - Data used for lookup and matching * @field_count Amount of fields in set * @scratch: Preallocated per-CPU maps for partial matching results - * @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes * @bsize_max: Maximum lookup table bucket size of all fields, in longs * @rcu Matching data is swapped on commits * @f: Fields, with lookup and mapping tables */ struct nft_pipapo_match { int field_count; -#ifdef NFT_PIPAPO_ALIGN - struct nft_pipapo_scratch * __percpu *scratch_aligned; -#endif struct nft_pipapo_scratch * __percpu *scratch; size_t bsize_max; struct rcu_head rcu; diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 78213c73af2e2..90e275bb3e5d7 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1139,7 +1139,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, */ kernel_fpu_begin_mask(0); - scratch = *raw_cpu_ptr(m->scratch_aligned); + scratch = *raw_cpu_ptr(m->scratch); if (unlikely(!scratch)) { kernel_fpu_end(); return false; -- GitLab From c98d8836b817d11fdff4ca7749cbbe04ff7f0c64 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 31 Jan 2024 16:49:10 +0100 Subject: [PATCH 0841/1405] wifi: mac80211: reload info pointer in ieee80211_tx_dequeue() This pointer can change here since the SKB can change, so we actually later open-coded IEEE80211_SKB_CB() again. Reload the pointer where needed, so the monitor-mode case using it gets fixed, and then use info-> later as well. Cc: stable@vger.kernel.org Fixes: 531682159092 ("mac80211: fix VLAN handling with TXQs") Link: https://msgid.link/20240131164910.b54c28d583bc.I29450cec84ea6773cff5d9c16ff92b836c331471@changeid Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index e448ab3384489..6fbb15b65902c 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * Transmit and frame generation functions. */ @@ -3927,6 +3927,7 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, goto begin; skb = __skb_dequeue(&tx.skbs); + info = IEEE80211_SKB_CB(skb); if (!skb_queue_empty(&tx.skbs)) { spin_lock_bh(&fq->lock); @@ -3971,7 +3972,7 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, } encap_out: - IEEE80211_SKB_CB(skb)->control.vif = vif; + info->control.vif = vif; if (tx.sta && wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) { -- GitLab From 2e57b77583ca34fdb6e14f253172636c52f81cf2 Mon Sep 17 00:00:00 2001 From: Daniel Gabay Date: Mon, 5 Feb 2024 00:06:03 +0200 Subject: [PATCH 0842/1405] wifi: iwlwifi: mvm: use correct address 3 in A-MSDU As described in IEEE sta 802.11-2020, table 9-30 (Address field contents), A-MSDU address 3 should contain the BSSID address. In TX_CMD we copy the MAC header from skb, and skb address 3 holds the destination address, but it may not be identical to the BSSID. Using the wrong destination address appears to work with (most) receivers without MLO, but in MLO some devices are checking for it carefully, perhaps as a consequence of link to MLD address translation. Replace address 3 in the TX_CMD MAC header with the correct address while retaining the skb address 3 unchanged. This ensures that skb address 3 will be utilized later for constructing the A-MSDU subframes. Note that we fill in the MLD address, but the firmware will do the necessary translation to link address after encryption. Signed-off-by: Daniel Gabay Signed-off-by: Miri Korenblit Link: https://msgid.link/20240204235836.4583a1bf9188.I3f8e7892bdf8f86b4daa28453771a8c9817b2416@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 69 ++++++++++++++++++--- 1 file changed, 59 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index db986bfc4dc3f..461f26d9214e4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -520,13 +520,24 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm, } } +static void iwl_mvm_copy_hdr(void *cmd, const void *hdr, int hdrlen, + const u8 *addr3_override) +{ + struct ieee80211_hdr *out_hdr = cmd; + + memcpy(cmd, hdr, hdrlen); + if (addr3_override) + memcpy(out_hdr->addr3, addr3_override, ETH_ALEN); +} + /* * Allocates and sets the Tx cmd the driver data pointers in the skb */ static struct iwl_device_tx_cmd * iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb, struct ieee80211_tx_info *info, int hdrlen, - struct ieee80211_sta *sta, u8 sta_id) + struct ieee80211_sta *sta, u8 sta_id, + const u8 *addr3_override) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; struct iwl_device_tx_cmd *dev_cmd; @@ -584,7 +595,7 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb, cmd->len = cpu_to_le16((u16)skb->len); /* Copy MAC header from skb into command buffer */ - memcpy(cmd->hdr, hdr, hdrlen); + iwl_mvm_copy_hdr(cmd->hdr, hdr, hdrlen, addr3_override); cmd->flags = cpu_to_le16(flags); cmd->rate_n_flags = cpu_to_le32(rate_n_flags); @@ -599,7 +610,7 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb, cmd->len = cpu_to_le16((u16)skb->len); /* Copy MAC header from skb into command buffer */ - memcpy(cmd->hdr, hdr, hdrlen); + iwl_mvm_copy_hdr(cmd->hdr, hdr, hdrlen, addr3_override); cmd->flags = cpu_to_le32(flags); cmd->rate_n_flags = cpu_to_le32(rate_n_flags); @@ -617,7 +628,7 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb, iwl_mvm_set_tx_cmd_rate(mvm, tx_cmd, info, sta, hdr->frame_control); /* Copy MAC header from skb into command buffer */ - memcpy(tx_cmd->hdr, hdr, hdrlen); + iwl_mvm_copy_hdr(tx_cmd->hdr, hdr, hdrlen, addr3_override); out: return dev_cmd; @@ -820,7 +831,8 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) IWL_DEBUG_TX(mvm, "station Id %d, queue=%d\n", sta_id, queue); - dev_cmd = iwl_mvm_set_tx_params(mvm, skb, &info, hdrlen, NULL, sta_id); + dev_cmd = iwl_mvm_set_tx_params(mvm, skb, &info, hdrlen, NULL, sta_id, + NULL); if (!dev_cmd) return -1; @@ -1140,7 +1152,8 @@ static int iwl_mvm_tx_pkt_queued(struct iwl_mvm *mvm, */ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, struct ieee80211_tx_info *info, - struct ieee80211_sta *sta) + struct ieee80211_sta *sta, + const u8 *addr3_override) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; struct iwl_mvm_sta *mvmsta; @@ -1172,7 +1185,8 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, iwl_mvm_probe_resp_set_noa(mvm, skb); dev_cmd = iwl_mvm_set_tx_params(mvm, skb, info, hdrlen, - sta, mvmsta->deflink.sta_id); + sta, mvmsta->deflink.sta_id, + addr3_override); if (!dev_cmd) goto drop; @@ -1294,9 +1308,11 @@ int iwl_mvm_tx_skb_sta(struct iwl_mvm *mvm, struct sk_buff *skb, struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); struct ieee80211_tx_info info; struct sk_buff_head mpdus_skbs; + struct ieee80211_vif *vif; unsigned int payload_len; int ret; struct sk_buff *orig_skb = skb; + const u8 *addr3; if (WARN_ON_ONCE(!mvmsta)) return -1; @@ -1307,26 +1323,59 @@ int iwl_mvm_tx_skb_sta(struct iwl_mvm *mvm, struct sk_buff *skb, memcpy(&info, skb->cb, sizeof(info)); if (!skb_is_gso(skb)) - return iwl_mvm_tx_mpdu(mvm, skb, &info, sta); + return iwl_mvm_tx_mpdu(mvm, skb, &info, sta, NULL); payload_len = skb_tail_pointer(skb) - skb_transport_header(skb) - tcp_hdrlen(skb) + skb->data_len; if (payload_len <= skb_shinfo(skb)->gso_size) - return iwl_mvm_tx_mpdu(mvm, skb, &info, sta); + return iwl_mvm_tx_mpdu(mvm, skb, &info, sta, NULL); __skb_queue_head_init(&mpdus_skbs); + vif = info.control.vif; + if (!vif) + return -1; + ret = iwl_mvm_tx_tso(mvm, skb, &info, sta, &mpdus_skbs); if (ret) return ret; WARN_ON(skb_queue_empty(&mpdus_skbs)); + /* + * As described in IEEE sta 802.11-2020, table 9-30 (Address + * field contents), A-MSDU address 3 should contain the BSSID + * address. + * Pass address 3 down to iwl_mvm_tx_mpdu() and further to set it + * in the command header. We need to preserve the original + * address 3 in the skb header to correctly create all the + * A-MSDU subframe headers from it. + */ + switch (vif->type) { + case NL80211_IFTYPE_STATION: + addr3 = vif->cfg.ap_addr; + break; + case NL80211_IFTYPE_AP: + addr3 = vif->addr; + break; + default: + addr3 = NULL; + break; + } + while (!skb_queue_empty(&mpdus_skbs)) { + struct ieee80211_hdr *hdr; + bool amsdu; + skb = __skb_dequeue(&mpdus_skbs); + hdr = (void *)skb->data; + amsdu = ieee80211_is_data_qos(hdr->frame_control) && + (*ieee80211_get_qos_ctl(hdr) & + IEEE80211_QOS_CTL_A_MSDU_PRESENT); - ret = iwl_mvm_tx_mpdu(mvm, skb, &info, sta); + ret = iwl_mvm_tx_mpdu(mvm, skb, &info, sta, + amsdu ? addr3 : NULL); if (ret) { /* Free skbs created as part of TSO logic that have not yet been dequeued */ __skb_queue_purge(&mpdus_skbs); -- GitLab From 3012477cd510044d346c5e0465ead4732aef8349 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 5 Feb 2024 00:06:16 +0200 Subject: [PATCH 0843/1405] wifi: iwlwifi: clear link_id in time_event Before sending a SESSION PROTECTION cmd the driver checks if the link_id indicated in the time event (and for which the cmd will be sent) is valid and exists. Clear the te_data::link_id when FW notifies that a session protection ended, so the check will actually fail when it should. Fixes: 135065837310 ("wifi: iwlwifi: support link_id in SESSION_PROTECTION cmd") Signed-off-by: Miri Korenblit Link: https://msgid.link/20240204235836.c64a6b3606c2.I35cdc08e8a3be282563163690f8ca3edb51a3854@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/time-event.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 218fdf1ed5304..2e653a417d626 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2012-2014, 2018-2023 Intel Corporation + * Copyright (C) 2012-2014, 2018-2024 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2017 Intel Deutschland GmbH */ @@ -972,6 +972,7 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm, if (!le32_to_cpu(notif->status) || !le32_to_cpu(notif->start)) { /* End TE, notify mac80211 */ mvmvif->time_event_data.id = SESSION_PROTECT_CONF_MAX_ID; + mvmvif->time_event_data.link_id = -1; iwl_mvm_p2p_roc_finished(mvm); ieee80211_remain_on_channel_expired(mvm->hw); } else if (le32_to_cpu(notif->start)) { -- GitLab From c6ebb5b67641994de8bc486b33457fe0b681d6fe Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 8 Feb 2024 13:17:06 +0300 Subject: [PATCH 0844/1405] wifi: iwlwifi: Fix some error codes This saves the error as PTR_ERR(wifi_pkg). The problem is that "wifi_pkg" is a valid pointer, not an error pointer. Set the error code to -EINVAL instead. Fixes: 2a8084147bff ("iwlwifi: acpi: support reading and storing WRDS revision 1 and 2") Signed-off-by: Dan Carpenter Link: https://msgid.link/9620bb77-2d7c-4d76-b255-ad824ebf8e35@moroto.mountain Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index b96f30d11644e..d73d561709d30 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -618,7 +618,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 2) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -634,7 +634,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 1) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -650,7 +650,7 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 0) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -707,7 +707,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 2) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -723,7 +723,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 1) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } @@ -739,7 +739,7 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt) &tbl_rev); if (!IS_ERR(wifi_pkg)) { if (tbl_rev != 0) { - ret = PTR_ERR(wifi_pkg); + ret = -EINVAL; goto out_free; } -- GitLab From 65c6ee90455053cfd3067c17aaa4a42b0c766543 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 8 Feb 2024 13:17:31 +0300 Subject: [PATCH 0845/1405] wifi: iwlwifi: uninitialized variable in iwl_acpi_get_ppag_table() This is an error path and Smatch complains that "tbl_rev" is uninitialized on this path. All the other functions follow this same patter where they set the error code and goto out_free so that's probably what was intended here as well. Fixes: e8e10a37c51c ("iwlwifi: acpi: move ppag code from mvm to fw/acpi") Signed-off-by: Dan Carpenter Link: https://msgid.link/09900c01-6540-4a32-9451-563da0029cb6@moroto.mountain Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index d73d561709d30..dcc4810cb3247 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -1116,6 +1116,9 @@ int iwl_acpi_get_ppag_table(struct iwl_fw_runtime *fwrt) goto read_table; } + ret = PTR_ERR(wifi_pkg); + goto out_free; + read_table: fwrt->ppag_ver = tbl_rev; flags = &wifi_pkg->package.elements[1]; -- GitLab From b7198383ef2debe748118996f627452281cf27d7 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 6 Feb 2024 18:02:04 +0200 Subject: [PATCH 0846/1405] wifi: iwlwifi: mvm: fix a crash when we run out of stations A DoS tool that injects loads of authentication frames made our AP crash. The iwl_mvm_is_dup() function couldn't find the per-queue dup_data which was not allocated. The root cause for that is that we ran out of stations in the firmware and we didn't really add the station to the firmware, yet we didn't return an error to mac80211. Mac80211 was thinking that we have the station and because of that, sta_info::uploaded was set to 1. This allowed ieee80211_find_sta_by_ifaddr() to return a valid station object, but that ieee80211_sta didn't have any iwl_mvm_sta object initialized and that caused the crash mentioned earlier when we got Rx on that station. Cc: stable@vger.kernel.org Fixes: 57974a55d995 ("wifi: iwlwifi: mvm: refactor iwl_mvm_mac_sta_state_common()") Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://msgid.link/20240206175739.1f76c44b2486.I6a00955e2842f15f0a089db2f834adb9d10fbe35@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 3 +++ drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 3447d67a8b311..53e26c3c3a9af 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -3687,6 +3687,9 @@ iwl_mvm_sta_state_notexist_to_none(struct iwl_mvm *mvm, NL80211_TDLS_SETUP); } + if (ret) + return ret; + for_each_sta_active_link(vif, sta, link_sta, i) link_sta->agg.max_rc_amsdu_len = 1; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 886d000985287..af15d470c69bd 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -505,6 +505,10 @@ static bool iwl_mvm_is_dup(struct ieee80211_sta *sta, int queue, return false; mvm_sta = iwl_mvm_sta_from_mac80211(sta); + + if (WARN_ON_ONCE(!mvm_sta->dup_data)) + return false; + dup_data = &mvm_sta->dup_data[queue]; /* -- GitLab From c14f09f010cc569ae7e2f6ef02374f6bfef9917e Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 8 Feb 2024 12:37:42 +0000 Subject: [PATCH 0847/1405] ASoC: cs35l56: Fix deadlock in ASP1 mixer register initialization Rewrite the handling of ASP1 TX mixer mux initialization to prevent a deadlock during component_remove(). The firmware can overwrite the ASP1 TX mixer registers with system-specific settings. This is mainly for hardware that uses the ASP as a chip-to-chip link controlled by the firmware. Because of this the driver cannot know the starting state of the ASP1 mixer muxes until the firmware has been downloaded and rebooted. The original workaround for this was to queue a work function from the dsp_work() job. This work then read the register values (populating the regmap cache the first time around) and then called snd_soc_dapm_mux_update_power(). The problem with this is that it was ultimately triggered by cs35l56_component_probe() queueing dsp_work, which meant that it would be running in parallel with the rest of the ASoC component and card initialization. To prevent accessing DAPM before it was fully initialized the work function took the card mutex. But this would deadlock if cs35l56_component_remove() was called before the work job had completed, because ASoC calls component_remove() with the card mutex held. This new version removes the work function. Instead the regmap cache and DAPM mux widgets are initialized the first time any of the associated ALSA controls is read or written. Signed-off-by: Richard Fitzgerald Fixes: 07f7d6e7a124 ("ASoC: cs35l56: Fix for initializing ASP1 mixer registers") Link: https://lore.kernel.org/r/20240208123742.1278104-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 153 +++++++++++++++++-------------------- sound/soc/codecs/cs35l56.h | 2 +- 2 files changed, 73 insertions(+), 82 deletions(-) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index ebed5ab1245b4..98d3957c66e78 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -68,63 +68,7 @@ static const char * const cs35l56_asp1_mux_control_names[] = { "ASP1 TX1 Source", "ASP1 TX2 Source", "ASP1 TX3 Source", "ASP1 TX4 Source" }; -static int cs35l56_dspwait_asp1tx_get(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = snd_soc_dapm_kcontrol_component(kcontrol); - struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component); - struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; - int index = e->shift_l; - unsigned int addr, val; - int ret; - - /* Wait for mux to be initialized */ - cs35l56_wait_dsp_ready(cs35l56); - flush_work(&cs35l56->mux_init_work); - - addr = cs35l56_asp1_mixer_regs[index]; - ret = regmap_read(cs35l56->base.regmap, addr, &val); - if (ret) - return ret; - - val &= CS35L56_ASP_TXn_SRC_MASK; - ucontrol->value.enumerated.item[0] = snd_soc_enum_val_to_item(e, val); - - return 0; -} - -static int cs35l56_dspwait_asp1tx_put(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) -{ - struct snd_soc_component *component = snd_soc_dapm_kcontrol_component(kcontrol); - struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kcontrol); - struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component); - struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; - int item = ucontrol->value.enumerated.item[0]; - int index = e->shift_l; - unsigned int addr, val; - bool changed; - int ret; - - /* Wait for mux to be initialized */ - cs35l56_wait_dsp_ready(cs35l56); - flush_work(&cs35l56->mux_init_work); - - addr = cs35l56_asp1_mixer_regs[index]; - val = snd_soc_enum_item_to_val(e, item); - - ret = regmap_update_bits_check(cs35l56->base.regmap, addr, - CS35L56_ASP_TXn_SRC_MASK, val, &changed); - if (ret) - return ret; - - if (changed) - snd_soc_dapm_mux_update_power(dapm, kcontrol, item, e, NULL); - - return changed; -} - -static void cs35l56_mark_asp1_mixer_widgets_dirty(struct cs35l56_private *cs35l56) +static int cs35l56_sync_asp1_mixer_widgets_with_firmware(struct cs35l56_private *cs35l56) { struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(cs35l56->component); const char *prefix = cs35l56->component->name_prefix; @@ -135,13 +79,19 @@ static void cs35l56_mark_asp1_mixer_widgets_dirty(struct cs35l56_private *cs35l5 unsigned int val[4]; int i, item, ret; + if (cs35l56->asp1_mixer_widgets_initialized) + return 0; + /* * Resume so we can read the registers from silicon if the regmap * cache has not yet been populated. */ ret = pm_runtime_resume_and_get(cs35l56->base.dev); if (ret < 0) - return; + return ret; + + /* Wait for firmware download and reboot */ + cs35l56_wait_dsp_ready(cs35l56); ret = regmap_bulk_read(cs35l56->base.regmap, CS35L56_ASP1TX1_INPUT, val, ARRAY_SIZE(val)); @@ -151,12 +101,9 @@ static void cs35l56_mark_asp1_mixer_widgets_dirty(struct cs35l56_private *cs35l5 if (ret) { dev_err(cs35l56->base.dev, "Failed to read ASP1 mixer regs: %d\n", ret); - return; + return ret; } - snd_soc_card_mutex_lock(dapm->card); - WARN_ON(!dapm->card->instantiated); - for (i = 0; i < ARRAY_SIZE(cs35l56_asp1_mux_control_names); ++i) { name = cs35l56_asp1_mux_control_names[i]; @@ -176,16 +123,65 @@ static void cs35l56_mark_asp1_mixer_widgets_dirty(struct cs35l56_private *cs35l5 snd_soc_dapm_mux_update_power(dapm, kcontrol, item, e, NULL); } - snd_soc_card_mutex_unlock(dapm->card); + cs35l56->asp1_mixer_widgets_initialized = true; + + return 0; } -static void cs35l56_mux_init_work(struct work_struct *work) +static int cs35l56_dspwait_asp1tx_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) { - struct cs35l56_private *cs35l56 = container_of(work, - struct cs35l56_private, - mux_init_work); + struct snd_soc_component *component = snd_soc_dapm_kcontrol_component(kcontrol); + struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component); + struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; + int index = e->shift_l; + unsigned int addr, val; + int ret; - cs35l56_mark_asp1_mixer_widgets_dirty(cs35l56); + ret = cs35l56_sync_asp1_mixer_widgets_with_firmware(cs35l56); + if (ret) + return ret; + + addr = cs35l56_asp1_mixer_regs[index]; + ret = regmap_read(cs35l56->base.regmap, addr, &val); + if (ret) + return ret; + + val &= CS35L56_ASP_TXn_SRC_MASK; + ucontrol->value.enumerated.item[0] = snd_soc_enum_val_to_item(e, val); + + return 0; +} + +static int cs35l56_dspwait_asp1tx_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = snd_soc_dapm_kcontrol_component(kcontrol); + struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kcontrol); + struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component); + struct soc_enum *e = (struct soc_enum *)kcontrol->private_value; + int item = ucontrol->value.enumerated.item[0]; + int index = e->shift_l; + unsigned int addr, val; + bool changed; + int ret; + + ret = cs35l56_sync_asp1_mixer_widgets_with_firmware(cs35l56); + if (ret) + return ret; + + addr = cs35l56_asp1_mixer_regs[index]; + val = snd_soc_enum_item_to_val(e, item); + + ret = regmap_update_bits_check(cs35l56->base.regmap, addr, + CS35L56_ASP_TXn_SRC_MASK, val, &changed); + if (ret) + return ret; + + if (changed) + snd_soc_dapm_mux_update_power(dapm, kcontrol, item, e, NULL); + + return changed; } static DECLARE_TLV_DB_SCALE(vol_tlv, -10000, 25, 0); @@ -936,14 +932,6 @@ static void cs35l56_dsp_work(struct work_struct *work) else cs35l56_patch(cs35l56, firmware_missing); - - /* - * Set starting value of ASP1 mux widgets. Updating a mux takes - * the DAPM mutex. Post this to a separate job so that DAPM - * power-up can wait for dsp_work to complete without deadlocking - * on the DAPM mutex. - */ - queue_work(cs35l56->dsp_wq, &cs35l56->mux_init_work); err: pm_runtime_mark_last_busy(cs35l56->base.dev); pm_runtime_put_autosuspend(cs35l56->base.dev); @@ -989,6 +977,13 @@ static int cs35l56_component_probe(struct snd_soc_component *component) debugfs_create_bool("can_hibernate", 0444, debugfs_root, &cs35l56->base.can_hibernate); debugfs_create_bool("fw_patched", 0444, debugfs_root, &cs35l56->base.fw_patched); + /* + * The widgets for the ASP1TX mixer can't be initialized + * until the firmware has been downloaded and rebooted. + */ + regcache_drop_region(cs35l56->base.regmap, CS35L56_ASP1TX1_INPUT, CS35L56_ASP1TX4_INPUT); + cs35l56->asp1_mixer_widgets_initialized = false; + queue_work(cs35l56->dsp_wq, &cs35l56->dsp_work); return 0; @@ -999,7 +994,6 @@ static void cs35l56_component_remove(struct snd_soc_component *component) struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component); cancel_work_sync(&cs35l56->dsp_work); - cancel_work_sync(&cs35l56->mux_init_work); if (cs35l56->dsp.cs_dsp.booted) wm_adsp_power_down(&cs35l56->dsp); @@ -1070,10 +1064,8 @@ int cs35l56_system_suspend(struct device *dev) dev_dbg(dev, "system_suspend\n"); - if (cs35l56->component) { + if (cs35l56->component) flush_work(&cs35l56->dsp_work); - cancel_work_sync(&cs35l56->mux_init_work); - } /* * The interrupt line is normally shared, but after we start suspending @@ -1224,7 +1216,6 @@ static int cs35l56_dsp_init(struct cs35l56_private *cs35l56) return -ENOMEM; INIT_WORK(&cs35l56->dsp_work, cs35l56_dsp_work); - INIT_WORK(&cs35l56->mux_init_work, cs35l56_mux_init_work); dsp = &cs35l56->dsp; cs35l56_init_cs_dsp(&cs35l56->base, &dsp->cs_dsp); diff --git a/sound/soc/codecs/cs35l56.h b/sound/soc/codecs/cs35l56.h index 596b141e3f961..b000e7365e406 100644 --- a/sound/soc/codecs/cs35l56.h +++ b/sound/soc/codecs/cs35l56.h @@ -34,7 +34,6 @@ struct cs35l56_private { struct wm_adsp dsp; /* must be first member */ struct cs35l56_base base; struct work_struct dsp_work; - struct work_struct mux_init_work; struct workqueue_struct *dsp_wq; struct snd_soc_component *component; struct regulator_bulk_data supplies[CS35L56_NUM_BULK_SUPPLIES]; @@ -52,6 +51,7 @@ struct cs35l56_private { u8 asp_slot_count; bool tdm_mode; bool sysclk_set; + bool asp1_mixer_widgets_initialized; u8 old_sdw_clock_scale; }; -- GitLab From d7332c4a4f1a7d16f054c6357fb65c597b6a86a7 Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Thu, 8 Feb 2024 15:34:32 +0200 Subject: [PATCH 0848/1405] ASoC: SOF: ipc3-topology: Fix pipeline tear down logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the change in the widget free logic to power down the cores only when the scheduler widgets are freed, we need to ensure that the scheduler widget is freed only after all the widgets associated with the scheduler are freed. This is to ensure that the secondary core that the scheduler is scheduled to run on is kept powered on until all widgets that need them are in use. While this works well for dynamic pipelines, in the case of static pipelines the current logic does not take this into account and frees all widgets in the order they occur in the widget_list. So, modify this to ensure that the scheduler widgets are freed only after all other types of widgets in the widget_list are freed. Link: https://github.com/thesofproject/linux/issues/4807 Fixes: 31ed8da1c8e5 ("ASoC: SOF: sof-audio: Modify logic for enabling/disabling topology cores") Signed-off-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20240208133432.1688-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc3-topology.c | 55 ++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/sound/soc/sof/ipc3-topology.c b/sound/soc/sof/ipc3-topology.c index a8832a1c1a244..d47698f4be2de 100644 --- a/sound/soc/sof/ipc3-topology.c +++ b/sound/soc/sof/ipc3-topology.c @@ -2360,27 +2360,16 @@ static int sof_tear_down_left_over_pipelines(struct snd_sof_dev *sdev) return 0; } -/* - * For older firmware, this function doesn't free widgets for static pipelines during suspend. - * It only resets use_count for all widgets. - */ -static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verify) +static int sof_ipc3_free_widgets_in_list(struct snd_sof_dev *sdev, bool include_scheduler, + bool *dyn_widgets, bool verify) { struct sof_ipc_fw_version *v = &sdev->fw_ready.version; struct snd_sof_widget *swidget; - struct snd_sof_route *sroute; - bool dyn_widgets = false; int ret; - /* - * This function is called during suspend and for one-time topology verification during - * first boot. In both cases, there is no need to protect swidget->use_count and - * sroute->setup because during suspend all running streams are suspended and during - * topology loading the sound card unavailable to open PCMs. - */ list_for_each_entry(swidget, &sdev->widget_list, list) { if (swidget->dynamic_pipeline_widget) { - dyn_widgets = true; + *dyn_widgets = true; continue; } @@ -2395,11 +2384,49 @@ static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verif continue; } + if (include_scheduler && swidget->id != snd_soc_dapm_scheduler) + continue; + + if (!include_scheduler && swidget->id == snd_soc_dapm_scheduler) + continue; + ret = sof_widget_free(sdev, swidget); if (ret < 0) return ret; } + return 0; +} + +/* + * For older firmware, this function doesn't free widgets for static pipelines during suspend. + * It only resets use_count for all widgets. + */ +static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verify) +{ + struct sof_ipc_fw_version *v = &sdev->fw_ready.version; + struct snd_sof_widget *swidget; + struct snd_sof_route *sroute; + bool dyn_widgets = false; + int ret; + + /* + * This function is called during suspend and for one-time topology verification during + * first boot. In both cases, there is no need to protect swidget->use_count and + * sroute->setup because during suspend all running streams are suspended and during + * topology loading the sound card unavailable to open PCMs. Do not free the scheduler + * widgets yet so that the secondary cores do not get powered down before all the widgets + * associated with the scheduler are freed. + */ + ret = sof_ipc3_free_widgets_in_list(sdev, false, &dyn_widgets, verify); + if (ret < 0) + return ret; + + /* free all the scheduler widgets now */ + ret = sof_ipc3_free_widgets_in_list(sdev, true, &dyn_widgets, verify); + if (ret < 0) + return ret; + /* * Tear down all pipelines associated with PCMs that did not get suspended * and unset the prepare flag so that they can be set up again during resume. -- GitLab From 8c427cc2fa73684ea140999e121b7b6c1c717632 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 24 Jan 2024 00:02:34 +0900 Subject: [PATCH 0849/1405] tracing/probes: Fix to show a parse error for bad type for $comm Fix to show a parse error for bad type (non-string) for $comm/$COMM and immediate-string. With this fix, error_log file shows appropriate error message as below. /sys/kernel/tracing # echo 'p vfs_read $comm:u32' >> kprobe_events sh: write error: Invalid argument /sys/kernel/tracing # echo 'p vfs_read \"hoge":u32' >> kprobe_events sh: write error: Invalid argument /sys/kernel/tracing # cat error_log [ 30.144183] trace_kprobe: error: $comm and immediate-string only accepts string type Command: p vfs_read $comm:u32 ^ [ 62.618500] trace_kprobe: error: $comm and immediate-string only accepts string type Command: p vfs_read \"hoge":u32 ^ Link: https://lore.kernel.org/all/170602215411.215583.2238016352271091852.stgit@devnote2/ Fixes: 3dd1f7f24f8c ("tracing: probeevent: Fix to make the type of $comm string") Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_probe.c | 7 +++++-- kernel/trace/trace_probe.h | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 4dc74d73fc1df..c6da5923e5b9f 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1159,9 +1159,12 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size, if (!(ctx->flags & TPARG_FL_TEVENT) && (strcmp(arg, "$comm") == 0 || strcmp(arg, "$COMM") == 0 || strncmp(arg, "\\\"", 2) == 0)) { - /* The type of $comm must be "string", and not an array. */ - if (parg->count || (t && strcmp(t, "string"))) + /* The type of $comm must be "string", and not an array type. */ + if (parg->count || (t && strcmp(t, "string"))) { + trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0), + NEED_STRING_TYPE); goto out; + } parg->type = find_fetch_type("string", ctx->flags); } else parg->type = find_fetch_type(t, ctx->flags); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 850d9ecb6765a..c1877d0182691 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -515,7 +515,8 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(BAD_HYPHEN, "Failed to parse single hyphen. Forgot '>'?"), \ C(NO_BTF_FIELD, "This field is not found."), \ C(BAD_BTF_TID, "Failed to get BTF type info."),\ - C(BAD_TYPE4STR, "This type does not fit for string."), + C(BAD_TYPE4STR, "This type does not fit for string."),\ + C(NEED_STRING_TYPE, "$comm and immediate-string only accepts string type"), #undef C #define C(a, b) TP_ERR_##a -- GitLab From 9a571c1e275cedacd48c66a6bddd0c23f1dffdbf Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 24 Jan 2024 00:03:02 +0900 Subject: [PATCH 0850/1405] tracing/probes: Fix to set arg size and fmt after setting type from BTF Since the BTF type setting updates probe_arg::type, the type size calculation and setting print-fmt should be done after that. Without this fix, the argument size and print-fmt can be wrong. Link: https://lore.kernel.org/all/170602218196.215583.6417859469540955777.stgit@devnote2/ Fixes: b576e09701c7 ("tracing/probes: Support function parameters if BTF is available") Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_probe.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index c6da5923e5b9f..34289f9c67076 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1172,18 +1172,6 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size, trace_probe_log_err(ctx->offset + (t ? (t - arg) : 0), BAD_TYPE); goto out; } - parg->offset = *size; - *size += parg->type->size * (parg->count ?: 1); - - ret = -ENOMEM; - if (parg->count) { - len = strlen(parg->type->fmttype) + 6; - parg->fmt = kmalloc(len, GFP_KERNEL); - if (!parg->fmt) - goto out; - snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype, - parg->count); - } code = tmp = kcalloc(FETCH_INSN_MAX, sizeof(*code), GFP_KERNEL); if (!code) @@ -1207,6 +1195,19 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size, goto fail; } } + parg->offset = *size; + *size += parg->type->size * (parg->count ?: 1); + + if (parg->count) { + len = strlen(parg->type->fmttype) + 6; + parg->fmt = kmalloc(len, GFP_KERNEL); + if (!parg->fmt) { + ret = -ENOMEM; + goto out; + } + snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype, + parg->count); + } ret = -EINVAL; /* Store operation */ -- GitLab From 9efd24ec5599ed485b7c4d9aeb731141f6285167 Mon Sep 17 00:00:00 2001 From: Li zeming Date: Tue, 19 Sep 2023 09:28:23 +0800 Subject: [PATCH 0851/1405] kprobes: Remove unnecessary initial values of variables ri and sym is assigned first, so it does not need to initialize the assignment. Link: https://lore.kernel.org/all/20230919012823.7815-1-zeming@nfschina.com/ Signed-off-by: Li zeming Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/kprobes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index d5a0ee40bf66c..9d9095e817928 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1993,7 +1993,7 @@ NOKPROBE_SYMBOL(__kretprobe_find_ret_addr); unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp, struct llist_node **cur) { - struct kretprobe_instance *ri = NULL; + struct kretprobe_instance *ri; kprobe_opcode_t *ret; if (WARN_ON_ONCE(!cur)) @@ -2802,7 +2802,7 @@ static int show_kprobe_addr(struct seq_file *pi, void *v) { struct hlist_head *head; struct kprobe *p, *kp; - const char *sym = NULL; + const char *sym; unsigned int i = *(loff_t *) v; unsigned long offset = 0; char *modname, namebuf[KSYM_NAME_LEN]; -- GitLab From 3ca8fbabcceb8bfe44f7f50640092fd8f1de375c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 8 Feb 2024 16:02:50 +0000 Subject: [PATCH 0852/1405] Revert "kobject: Remove redundant checks for whether ktype is NULL" This reverts commit 1b28cb81dab7c1eedc6034206f4e8d644046ad31. It is reported to cause problems, so revert it for now until the root cause can be found. Reported-by: kernel test robot Fixes: 1b28cb81dab7 ("kobject: Remove redundant checks for whether ktype is NULL") Cc: Zhen Lei Closes: https://lore.kernel.org/oe-lkp/202402071403.e302e33a-oliver.sang@intel.com Link: https://lore.kernel.org/r/2024020849-consensus-length-6264@gregkh Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/lib/kobject.c b/lib/kobject.c index 59dbcbdb1c916..72fa20f405f15 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -74,10 +74,12 @@ static int create_dir(struct kobject *kobj) if (error) return error; - error = sysfs_create_groups(kobj, ktype->default_groups); - if (error) { - sysfs_remove_dir(kobj); - return error; + if (ktype) { + error = sysfs_create_groups(kobj, ktype->default_groups); + if (error) { + sysfs_remove_dir(kobj); + return error; + } } /* @@ -589,7 +591,8 @@ static void __kobject_del(struct kobject *kobj) sd = kobj->sd; ktype = get_ktype(kobj); - sysfs_remove_groups(kobj, ktype->default_groups); + if (ktype) + sysfs_remove_groups(kobj, ktype->default_groups); /* send "remove" if the caller did not do it but sent "add" */ if (kobj->state_add_uevent_sent && !kobj->state_remove_uevent_sent) { @@ -666,6 +669,10 @@ static void kobject_cleanup(struct kobject *kobj) pr_debug("'%s' (%p): %s, parent %p\n", kobject_name(kobj), kobj, __func__, kobj->parent); + if (t && !t->release) + pr_debug("'%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n", + kobject_name(kobj), kobj); + /* remove from sysfs if the caller did not do it */ if (kobj->state_in_sysfs) { pr_debug("'%s' (%p): auto cleanup kobject_del\n", @@ -676,13 +683,10 @@ static void kobject_cleanup(struct kobject *kobj) parent = NULL; } - if (t->release) { + if (t && t->release) { pr_debug("'%s' (%p): calling ktype release\n", kobject_name(kobj), kobj); t->release(kobj); - } else { - pr_debug("'%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n", - kobject_name(kobj), kobj); } /* free name if we allocated it */ @@ -1056,7 +1060,7 @@ const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *pa { const struct kobj_ns_type_operations *ops = NULL; - if (parent && parent->ktype->child_ns_type) + if (parent && parent->ktype && parent->ktype->child_ns_type) ops = parent->ktype->child_ns_type(parent); return ops; -- GitLab From 45be0882c5f91e1b92e645001dd1a53b3bd58c97 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 5 Feb 2024 14:43:17 -0600 Subject: [PATCH 0853/1405] smb3: add missing null server pointer check Address static checker warning in cifs_ses_get_chan_index(): warn: variable dereferenced before check 'server' To be consistent, and reduce risk, we should add another check for null server pointer. Fixes: 88675b22d34e ("cifs: do not search for channel if server is terminating") Reported-by: Dan Carpenter Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/sess.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index ed4bd88dd528a..476d54fceb50f 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -76,7 +76,7 @@ cifs_ses_get_chan_index(struct cifs_ses *ses, unsigned int i; /* if the channel is waiting for termination */ - if (server->terminate) + if (server && server->terminate) return CIFS_INVAL_CHAN_INDEX; for (i = 0; i < ses->chan_count; i++) { -- GitLab From 55c7788c37242702868bfac7861cdf0c358d6c3d Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 2 Feb 2024 12:38:24 -0300 Subject: [PATCH 0854/1405] smb: client: set correct d_type for reparse points under DFS mounts Send query dir requests with an info level of SMB_FIND_FILE_FULL_DIRECTORY_INFO rather than SMB_FIND_FILE_DIRECTORY_INFO when the client is generating its own inode numbers (e.g. noserverino) so that reparse tags still can be parsed directly from the responses, but server won't send UniqueId (server inode number) Signed-off-by: Paulo Alcantara Signed-off-by: Steve French --- fs/smb/client/readdir.c | 15 ++++++++------- fs/smb/client/smb2pdu.c | 6 ++++++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 3b1b01d10f7d7..b520eea7bfce8 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -307,14 +307,16 @@ cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info, } static void cifs_fulldir_info_to_fattr(struct cifs_fattr *fattr, - SEARCH_ID_FULL_DIR_INFO *info, + const void *info, struct cifs_sb_info *cifs_sb) { + const FILE_FULL_DIRECTORY_INFO *di = info; + __dir_info_to_fattr(fattr, info); - /* See MS-FSCC 2.4.19 FileIdFullDirectoryInformation */ + /* See MS-FSCC 2.4.14, 2.4.19 */ if (fattr->cf_cifsattrs & ATTR_REPARSE) - fattr->cf_cifstag = le32_to_cpu(info->EaSize); + fattr->cf_cifstag = le32_to_cpu(di->EaSize); cifs_fill_common_info(fattr, cifs_sb); } @@ -396,7 +398,7 @@ _initiate_cifs_search(const unsigned int xid, struct file *file, } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { cifsFile->srch_inf.info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO; } else /* not srvinos - BB fixme add check for backlevel? */ { - cifsFile->srch_inf.info_level = SMB_FIND_FILE_DIRECTORY_INFO; + cifsFile->srch_inf.info_level = SMB_FIND_FILE_FULL_DIRECTORY_INFO; } search_flags = CIFS_SEARCH_CLOSE_AT_END | CIFS_SEARCH_RETURN_RESUME; @@ -987,10 +989,9 @@ static int cifs_filldir(char *find_entry, struct file *file, (FIND_FILE_STANDARD_INFO *)find_entry, cifs_sb); break; + case SMB_FIND_FILE_FULL_DIRECTORY_INFO: case SMB_FIND_FILE_ID_FULL_DIR_INFO: - cifs_fulldir_info_to_fattr(&fattr, - (SEARCH_ID_FULL_DIR_INFO *)find_entry, - cifs_sb); + cifs_fulldir_info_to_fattr(&fattr, find_entry, cifs_sb); break; default: cifs_dir_info_to_fattr(&fattr, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index c58fa44dd6b06..4085ce27fd388 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -5206,6 +5206,9 @@ int SMB2_query_directory_init(const unsigned int xid, case SMB_FIND_FILE_POSIX_INFO: req->FileInformationClass = SMB_FIND_FILE_POSIX_INFO; break; + case SMB_FIND_FILE_FULL_DIRECTORY_INFO: + req->FileInformationClass = FILE_FULL_DIRECTORY_INFORMATION; + break; default: cifs_tcon_dbg(VFS, "info level %u isn't supported\n", info_level); @@ -5275,6 +5278,9 @@ smb2_parse_query_directory(struct cifs_tcon *tcon, /* note that posix payload are variable size */ info_buf_size = sizeof(struct smb2_posix_info); break; + case SMB_FIND_FILE_FULL_DIRECTORY_INFO: + info_buf_size = sizeof(FILE_FULL_DIRECTORY_INFO); + break; default: cifs_tcon_dbg(VFS, "info level %u isn't supported\n", srch_inf->info_level); -- GitLab From 2a427b49d02995ea4a6ff93a1432c40fa4d36821 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 20 Nov 2023 12:25:56 -1000 Subject: [PATCH 0855/1405] blk-iocost: Fix an UBSAN shift-out-of-bounds warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When iocg_kick_delay() is called from a CPU different than the one which set the delay, @now may be in the past of @iocg->delay_at leading to the following warning: UBSAN: shift-out-of-bounds in block/blk-iocost.c:1359:23 shift exponent 18446744073709 is too large for 64-bit type 'u64' (aka 'unsigned long long') ... Call Trace: dump_stack_lvl+0x79/0xc0 __ubsan_handle_shift_out_of_bounds+0x2ab/0x300 iocg_kick_delay+0x222/0x230 ioc_rqos_merge+0x1d7/0x2c0 __rq_qos_merge+0x2c/0x80 bio_attempt_back_merge+0x83/0x190 blk_attempt_plug_merge+0x101/0x150 blk_mq_submit_bio+0x2b1/0x720 submit_bio_noacct_nocheck+0x320/0x3e0 __swap_writepage+0x2ab/0x9d0 The underflow itself doesn't really affect the behavior in any meaningful way; however, the past timestamp may exaggerate the delay amount calculated later in the code, which shouldn't be a material problem given the nature of the delay mechanism. If @now is in the past, this CPU is racing another CPU which recently set up the delay and there's nothing this CPU can contribute w.r.t. the delay. Let's bail early from iocg_kick_delay() in such cases. Reported-by: Breno Leitão Signed-off-by: Tejun Heo Fixes: 5160a5a53c0c ("blk-iocost: implement delay adjustment hysteresis") Link: https://lore.kernel.org/r/ZVvc9L_CYk5LO1fT@slm.duckdns.org Signed-off-by: Jens Axboe --- block/blk-iocost.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index c8beec6d7df08..04d44f0bcbc85 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1353,6 +1353,13 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) lockdep_assert_held(&iocg->waitq.lock); + /* + * If the delay is set by another CPU, we may be in the past. No need to + * change anything if so. This avoids decay calculation underflow. + */ + if (time_before64(now->now, iocg->delay_at)) + return false; + /* calculate the current delay in effect - 1/2 every second */ tdelta = now->now - iocg->delay_at; if (iocg->delay) -- GitLab From c23de7ceae59e4ca5894c3ecf4f785c50c0fa428 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 5 Feb 2024 18:51:26 +0100 Subject: [PATCH 0856/1405] docs: kernel_feat.py: fix build error for missing files If the directory passed to the '.. kernel-feat::' directive does not exist or the get_feat.pl script does not find any files to extract features from, Sphinx will report the following error: Sphinx parallel build error: UnboundLocalError: local variable 'fname' referenced before assignment make[2]: *** [Documentation/Makefile:102: htmldocs] Error 2 This is due to how I changed the script in c48a7c44a1d0 ("docs: kernel_feat.py: fix potential command injection"). Before that, the filename passed along to self.nestedParse() in this case was weirdly just the whole get_feat.pl invocation. We can fix it by doing what kernel_abi.py does -- just pass self.arguments[0] as 'fname'. Fixes: c48a7c44a1d0 ("docs: kernel_feat.py: fix potential command injection") Cc: Justin Forbes Cc: Salvatore Bonaccorso Cc: Jani Nikula Cc: Mauro Carvalho Chehab Cc: stable@vger.kernel.org Signed-off-by: Vegard Nossum Link: https://lore.kernel.org/r/20240205175133.774271-2-vegard.nossum@oracle.com Signed-off-by: Jonathan Corbet --- Documentation/sphinx/kernel_feat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/sphinx/kernel_feat.py b/Documentation/sphinx/kernel_feat.py index b9df61eb45013..03ace5f01b5c0 100644 --- a/Documentation/sphinx/kernel_feat.py +++ b/Documentation/sphinx/kernel_feat.py @@ -109,7 +109,7 @@ class KernelFeat(Directive): else: out_lines += line + "\n" - nodeList = self.nestedParse(out_lines, fname) + nodeList = self.nestedParse(out_lines, self.arguments[0]) return nodeList def nestedParse(self, lines, fname): -- GitLab From 4ce6e2db00de8103a0687fb0f65fd17124a51aaa Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Mon, 29 Jan 2024 16:52:50 +0800 Subject: [PATCH 0857/1405] virtio-blk: Ensure no requests in virtqueues before deleting vqs. Ensure no remaining requests in virtqueues before resetting vdev and deleting virtqueues. Otherwise these requests will never be completed. It may cause the system to become unresponsive. Function blk_mq_quiesce_queue() can ensure that requests have become in_flight status, but it cannot guarantee that requests have been processed by the device. Virtqueues should never be deleted before all requests become complete status. Function blk_mq_freeze_queue() ensure that all requests in virtqueues become complete status. And no requests can enter in virtqueues. Signed-off-by: Yi Sun Reviewed-by: Stefan Hajnoczi Link: https://lore.kernel.org/r/20240129085250.1550594-1-yi.sun@unisoc.com Signed-off-by: Jens Axboe --- drivers/block/virtio_blk.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 5bf98fd6a651a..2bf14a0e2815f 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1593,14 +1593,15 @@ static int virtblk_freeze(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv; + /* Ensure no requests in virtqueues before deleting vqs. */ + blk_mq_freeze_queue(vblk->disk->queue); + /* Ensure we don't receive any more interrupts */ virtio_reset_device(vdev); /* Make sure no work handler is accessing the device. */ flush_work(&vblk->config_work); - blk_mq_quiesce_queue(vblk->disk->queue); - vdev->config->del_vqs(vdev); kfree(vblk->vqs); @@ -1618,7 +1619,7 @@ static int virtblk_restore(struct virtio_device *vdev) virtio_device_ready(vdev); - blk_mq_unquiesce_queue(vblk->disk->queue); + blk_mq_unfreeze_queue(vblk->disk->queue); return 0; } #endif -- GitLab From 53c0441dd2c44ee93fddb5473885fd41e4bc2361 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 7 Feb 2024 12:59:02 +0100 Subject: [PATCH 0858/1405] dpll: fix possible deadlock during netlink dump operation Recently, I've been hitting following deadlock warning during dpll pin dump: [52804.637962] ====================================================== [52804.638536] WARNING: possible circular locking dependency detected [52804.639111] 6.8.0-rc2jiri+ #1 Not tainted [52804.639529] ------------------------------------------------------ [52804.640104] python3/2984 is trying to acquire lock: [52804.640581] ffff88810e642678 (nlk_cb_mutex-GENERIC){+.+.}-{3:3}, at: netlink_dump+0xb3/0x780 [52804.641417] but task is already holding lock: [52804.642010] ffffffff83bde4c8 (dpll_lock){+.+.}-{3:3}, at: dpll_lock_dumpit+0x13/0x20 [52804.642747] which lock already depends on the new lock. [52804.643551] the existing dependency chain (in reverse order) is: [52804.644259] -> #1 (dpll_lock){+.+.}-{3:3}: [52804.644836] lock_acquire+0x174/0x3e0 [52804.645271] __mutex_lock+0x119/0x1150 [52804.645723] dpll_lock_dumpit+0x13/0x20 [52804.646169] genl_start+0x266/0x320 [52804.646578] __netlink_dump_start+0x321/0x450 [52804.647056] genl_family_rcv_msg_dumpit+0x155/0x1e0 [52804.647575] genl_rcv_msg+0x1ed/0x3b0 [52804.648001] netlink_rcv_skb+0xdc/0x210 [52804.648440] genl_rcv+0x24/0x40 [52804.648831] netlink_unicast+0x2f1/0x490 [52804.649290] netlink_sendmsg+0x36d/0x660 [52804.649742] __sock_sendmsg+0x73/0xc0 [52804.650165] __sys_sendto+0x184/0x210 [52804.650597] __x64_sys_sendto+0x72/0x80 [52804.651045] do_syscall_64+0x6f/0x140 [52804.651474] entry_SYSCALL_64_after_hwframe+0x46/0x4e [52804.652001] -> #0 (nlk_cb_mutex-GENERIC){+.+.}-{3:3}: [52804.652650] check_prev_add+0x1ae/0x1280 [52804.653107] __lock_acquire+0x1ed3/0x29a0 [52804.653559] lock_acquire+0x174/0x3e0 [52804.653984] __mutex_lock+0x119/0x1150 [52804.654423] netlink_dump+0xb3/0x780 [52804.654845] __netlink_dump_start+0x389/0x450 [52804.655321] genl_family_rcv_msg_dumpit+0x155/0x1e0 [52804.655842] genl_rcv_msg+0x1ed/0x3b0 [52804.656272] netlink_rcv_skb+0xdc/0x210 [52804.656721] genl_rcv+0x24/0x40 [52804.657119] netlink_unicast+0x2f1/0x490 [52804.657570] netlink_sendmsg+0x36d/0x660 [52804.658022] __sock_sendmsg+0x73/0xc0 [52804.658450] __sys_sendto+0x184/0x210 [52804.658877] __x64_sys_sendto+0x72/0x80 [52804.659322] do_syscall_64+0x6f/0x140 [52804.659752] entry_SYSCALL_64_after_hwframe+0x46/0x4e [52804.660281] other info that might help us debug this: [52804.661077] Possible unsafe locking scenario: [52804.661671] CPU0 CPU1 [52804.662129] ---- ---- [52804.662577] lock(dpll_lock); [52804.662924] lock(nlk_cb_mutex-GENERIC); [52804.663538] lock(dpll_lock); [52804.664073] lock(nlk_cb_mutex-GENERIC); [52804.664490] The issue as follows: __netlink_dump_start() calls control->start(cb) with nlk->cb_mutex held. In control->start(cb) the dpll_lock is taken. Then nlk->cb_mutex is released and taken again in netlink_dump(), while dpll_lock still being held. That leads to ABBA deadlock when another CPU races with the same operation. Fix this by moving dpll_lock taking into dumpit() callback which ensures correct lock taking order. Fixes: 9d71b54b65b1 ("dpll: netlink: Add DPLL framework base functions") Signed-off-by: Jiri Pirko Reviewed-by: Arkadiusz Kubalewski Link: https://lore.kernel.org/r/20240207115902.371649-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/dpll.yaml | 4 ---- drivers/dpll/dpll_netlink.c | 20 ++++++-------------- drivers/dpll/dpll_nl.c | 4 ---- drivers/dpll/dpll_nl.h | 2 -- 4 files changed, 6 insertions(+), 24 deletions(-) diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml index b14aed18065f4..3dcc9ece272aa 100644 --- a/Documentation/netlink/specs/dpll.yaml +++ b/Documentation/netlink/specs/dpll.yaml @@ -384,8 +384,6 @@ operations: - type dump: - pre: dpll-lock-dumpit - post: dpll-unlock-dumpit reply: *dev-attrs - @@ -473,8 +471,6 @@ operations: - fractional-frequency-offset dump: - pre: dpll-lock-dumpit - post: dpll-unlock-dumpit request: attributes: - id diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index 314bb37754651..4ca9ad16cd957 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -1199,6 +1199,7 @@ int dpll_nl_pin_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) unsigned long i; int ret = 0; + mutex_lock(&dpll_lock); xa_for_each_marked_start(&dpll_pin_xa, i, pin, DPLL_REGISTERED, ctx->idx) { if (!dpll_pin_available(pin)) @@ -1218,6 +1219,8 @@ int dpll_nl_pin_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) } genlmsg_end(skb, hdr); } + mutex_unlock(&dpll_lock); + if (ret == -EMSGSIZE) { ctx->idx = i; return skb->len; @@ -1373,6 +1376,7 @@ int dpll_nl_device_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) unsigned long i; int ret = 0; + mutex_lock(&dpll_lock); xa_for_each_marked_start(&dpll_device_xa, i, dpll, DPLL_REGISTERED, ctx->idx) { hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, @@ -1389,6 +1393,8 @@ int dpll_nl_device_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) } genlmsg_end(skb, hdr); } + mutex_unlock(&dpll_lock); + if (ret == -EMSGSIZE) { ctx->idx = i; return skb->len; @@ -1439,20 +1445,6 @@ dpll_unlock_doit(const struct genl_split_ops *ops, struct sk_buff *skb, mutex_unlock(&dpll_lock); } -int dpll_lock_dumpit(struct netlink_callback *cb) -{ - mutex_lock(&dpll_lock); - - return 0; -} - -int dpll_unlock_dumpit(struct netlink_callback *cb) -{ - mutex_unlock(&dpll_lock); - - return 0; -} - int dpll_pin_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info) { diff --git a/drivers/dpll/dpll_nl.c b/drivers/dpll/dpll_nl.c index eaee5be7aa642..1e95f5397cfce 100644 --- a/drivers/dpll/dpll_nl.c +++ b/drivers/dpll/dpll_nl.c @@ -95,9 +95,7 @@ static const struct genl_split_ops dpll_nl_ops[] = { }, { .cmd = DPLL_CMD_DEVICE_GET, - .start = dpll_lock_dumpit, .dumpit = dpll_nl_device_get_dumpit, - .done = dpll_unlock_dumpit, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP, }, { @@ -129,9 +127,7 @@ static const struct genl_split_ops dpll_nl_ops[] = { }, { .cmd = DPLL_CMD_PIN_GET, - .start = dpll_lock_dumpit, .dumpit = dpll_nl_pin_get_dumpit, - .done = dpll_unlock_dumpit, .policy = dpll_pin_get_dump_nl_policy, .maxattr = DPLL_A_PIN_ID, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP, diff --git a/drivers/dpll/dpll_nl.h b/drivers/dpll/dpll_nl.h index 92d4c9c4f788d..f491262bee4f0 100644 --- a/drivers/dpll/dpll_nl.h +++ b/drivers/dpll/dpll_nl.h @@ -30,8 +30,6 @@ dpll_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, void dpll_pin_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); -int dpll_lock_dumpit(struct netlink_callback *cb); -int dpll_unlock_dumpit(struct netlink_callback *cb); int dpll_nl_device_id_get_doit(struct sk_buff *skb, struct genl_info *info); int dpll_nl_device_get_doit(struct sk_buff *skb, struct genl_info *info); -- GitLab From aa1eec2f546f2afa8c98ec41e5d8ee488165d685 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 6 Feb 2024 17:43:28 +0100 Subject: [PATCH 0859/1405] net/mlx5: DPLL, Fix possible use after free after delayed work timer triggers I managed to hit following use after free warning recently: [ 2169.711665] ================================================================== [ 2169.714009] BUG: KASAN: slab-use-after-free in __run_timers.part.0+0x179/0x4c0 [ 2169.716293] Write of size 8 at addr ffff88812b326a70 by task swapper/4/0 [ 2169.719022] CPU: 4 PID: 0 Comm: swapper/4 Not tainted 6.8.0-rc2jiri+ #2 [ 2169.720974] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 2169.722457] Call Trace: [ 2169.722756] [ 2169.723024] dump_stack_lvl+0x58/0xb0 [ 2169.723417] print_report+0xc5/0x630 [ 2169.723807] ? __virt_addr_valid+0x126/0x2b0 [ 2169.724268] kasan_report+0xbe/0xf0 [ 2169.724667] ? __run_timers.part.0+0x179/0x4c0 [ 2169.725116] ? __run_timers.part.0+0x179/0x4c0 [ 2169.725570] __run_timers.part.0+0x179/0x4c0 [ 2169.726003] ? call_timer_fn+0x320/0x320 [ 2169.726404] ? lock_downgrade+0x3a0/0x3a0 [ 2169.726820] ? kvm_clock_get_cycles+0x14/0x20 [ 2169.727257] ? ktime_get+0x92/0x150 [ 2169.727630] ? lapic_next_deadline+0x35/0x60 [ 2169.728069] run_timer_softirq+0x40/0x80 [ 2169.728475] __do_softirq+0x1a1/0x509 [ 2169.728866] irq_exit_rcu+0x95/0xc0 [ 2169.729241] sysvec_apic_timer_interrupt+0x6b/0x80 [ 2169.729718] [ 2169.729993] [ 2169.730259] asm_sysvec_apic_timer_interrupt+0x16/0x20 [ 2169.730755] RIP: 0010:default_idle+0x13/0x20 [ 2169.731190] Code: c0 08 00 00 00 4d 29 c8 4c 01 c7 4c 29 c2 e9 72 ff ff ff cc cc cc cc 8b 05 9a 7f 1f 02 85 c0 7e 07 0f 00 2d cf 69 43 00 fb f4 c3 66 66 2e 0f 1f 84 00 00 00 00 00 65 48 8b 04 25 c0 93 04 00 [ 2169.732759] RSP: 0018:ffff888100dbfe10 EFLAGS: 00000242 [ 2169.733264] RAX: 0000000000000001 RBX: ffff888100d9c200 RCX: ffffffff8241bd62 [ 2169.733925] RDX: ffffed109a848b15 RSI: 0000000000000004 RDI: ffffffff8127ac55 [ 2169.734566] RBP: 0000000000000004 R08: 0000000000000000 R09: ffffed109a848b14 [ 2169.735200] R10: ffff8884d42458a3 R11: 000000000000ba7e R12: ffffffff83d7d3a0 [ 2169.735835] R13: 1ffff110201b7fc6 R14: 0000000000000000 R15: ffff888100d9c200 [ 2169.736478] ? ct_kernel_exit.constprop.0+0xa2/0xc0 [ 2169.736954] ? do_idle+0x285/0x290 [ 2169.737323] default_idle_call+0x63/0x90 [ 2169.737730] do_idle+0x285/0x290 [ 2169.738089] ? arch_cpu_idle_exit+0x30/0x30 [ 2169.738511] ? mark_held_locks+0x1a/0x80 [ 2169.738917] ? lockdep_hardirqs_on_prepare+0x12e/0x200 [ 2169.739417] cpu_startup_entry+0x30/0x40 [ 2169.739825] start_secondary+0x19a/0x1c0 [ 2169.740229] ? set_cpu_sibling_map+0xbd0/0xbd0 [ 2169.740673] secondary_startup_64_no_verify+0x15d/0x16b [ 2169.741179] [ 2169.741686] Allocated by task 1098: [ 2169.742058] kasan_save_stack+0x1c/0x40 [ 2169.742456] kasan_save_track+0x10/0x30 [ 2169.742852] __kasan_kmalloc+0x83/0x90 [ 2169.743246] mlx5_dpll_probe+0xf5/0x3c0 [mlx5_dpll] [ 2169.743730] auxiliary_bus_probe+0x62/0xb0 [ 2169.744148] really_probe+0x127/0x590 [ 2169.744534] __driver_probe_device+0xd2/0x200 [ 2169.744973] device_driver_attach+0x6b/0xf0 [ 2169.745402] bind_store+0x90/0xe0 [ 2169.745761] kernfs_fop_write_iter+0x1df/0x2a0 [ 2169.746210] vfs_write+0x41f/0x790 [ 2169.746579] ksys_write+0xc7/0x160 [ 2169.746947] do_syscall_64+0x6f/0x140 [ 2169.747333] entry_SYSCALL_64_after_hwframe+0x46/0x4e [ 2169.748049] Freed by task 1220: [ 2169.748393] kasan_save_stack+0x1c/0x40 [ 2169.748789] kasan_save_track+0x10/0x30 [ 2169.749188] kasan_save_free_info+0x3b/0x50 [ 2169.749621] poison_slab_object+0x106/0x180 [ 2169.750044] __kasan_slab_free+0x14/0x50 [ 2169.750451] kfree+0x118/0x330 [ 2169.750792] mlx5_dpll_remove+0xf5/0x110 [mlx5_dpll] [ 2169.751271] auxiliary_bus_remove+0x2e/0x40 [ 2169.751694] device_release_driver_internal+0x24b/0x2e0 [ 2169.752191] unbind_store+0xa6/0xb0 [ 2169.752563] kernfs_fop_write_iter+0x1df/0x2a0 [ 2169.753004] vfs_write+0x41f/0x790 [ 2169.753381] ksys_write+0xc7/0x160 [ 2169.753750] do_syscall_64+0x6f/0x140 [ 2169.754132] entry_SYSCALL_64_after_hwframe+0x46/0x4e [ 2169.754847] Last potentially related work creation: [ 2169.755315] kasan_save_stack+0x1c/0x40 [ 2169.755709] __kasan_record_aux_stack+0x9b/0xf0 [ 2169.756165] __queue_work+0x382/0x8f0 [ 2169.756552] call_timer_fn+0x126/0x320 [ 2169.756941] __run_timers.part.0+0x2ea/0x4c0 [ 2169.757376] run_timer_softirq+0x40/0x80 [ 2169.757782] __do_softirq+0x1a1/0x509 [ 2169.758387] Second to last potentially related work creation: [ 2169.758924] kasan_save_stack+0x1c/0x40 [ 2169.759322] __kasan_record_aux_stack+0x9b/0xf0 [ 2169.759773] __queue_work+0x382/0x8f0 [ 2169.760156] call_timer_fn+0x126/0x320 [ 2169.760550] __run_timers.part.0+0x2ea/0x4c0 [ 2169.760978] run_timer_softirq+0x40/0x80 [ 2169.761381] __do_softirq+0x1a1/0x509 [ 2169.761998] The buggy address belongs to the object at ffff88812b326a00 which belongs to the cache kmalloc-256 of size 256 [ 2169.763061] The buggy address is located 112 bytes inside of freed 256-byte region [ffff88812b326a00, ffff88812b326b00) [ 2169.764346] The buggy address belongs to the physical page: [ 2169.764866] page:000000000f2b1e89 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x12b324 [ 2169.765731] head:000000000f2b1e89 order:2 entire_mapcount:0 nr_pages_mapped:0 pincount:0 [ 2169.766484] anon flags: 0x200000000000840(slab|head|node=0|zone=2) [ 2169.767048] page_type: 0xffffffff() [ 2169.767422] raw: 0200000000000840 ffff888100042b40 0000000000000000 dead000000000001 [ 2169.768183] raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 [ 2169.768899] page dumped because: kasan: bad access detected [ 2169.769649] Memory state around the buggy address: [ 2169.770116] ffff88812b326900: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 2169.770805] ffff88812b326980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 2169.771485] >ffff88812b326a00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2169.772173] ^ [ 2169.772787] ffff88812b326a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2169.773477] ffff88812b326b00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 2169.774160] ================================================================== [ 2169.774845] ================================================================== I didn't manage to reproduce it. Though the issue seems to be obvious. There is a chance that the mlx5_dpll_remove() calls cancel_delayed_work() when the work runs and manages to re-arm itself. In that case, after delay timer triggers next attempt to queue it, it works with freed memory. Fix this by using cancel_delayed_work_sync() instead which makes sure that work is done when it returns. Fixes: 496fd0a26bbf ("mlx5: Implement SyncE support using DPLL infrastructure") Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240206164328.360313-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/dpll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c index 18fed2b34fb1c..928bf24d4b123 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c @@ -389,7 +389,7 @@ static void mlx5_dpll_remove(struct auxiliary_device *adev) struct mlx5_dpll *mdpll = auxiliary_get_drvdata(adev); struct mlx5_core_dev *mdev = mdpll->mdev; - cancel_delayed_work(&mdpll->work); + cancel_delayed_work_sync(&mdpll->work); mlx5_dpll_mdev_netdev_untrack(mdpll, mdev); destroy_workqueue(mdpll->wq); dpll_pin_unregister(mdpll->dpll, mdpll->dpll_pin, -- GitLab From 4e1d71cabb19ec2586827adfc60d68689c68c194 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 6 Feb 2024 14:16:31 -0500 Subject: [PATCH 0860/1405] net/handshake: Fix handshake_req_destroy_test1 Recently, handshake_req_destroy_test1 started failing: Expected handshake_req_destroy_test == req, but handshake_req_destroy_test == 0000000000000000 req == 0000000060f99b40 not ok 11 req_destroy works This is because "sock_release(sock)" was replaced with "fput(filp)" to address a memory leak. Note that sock_release() is synchronous but fput() usually delays the final close and clean-up. The delay is not consequential in the other cases that were changed but handshake_req_destroy_test1 is testing that handshake_req_cancel() followed by closing the file actually does call the ->hp_destroy method. Thus the PTR_EQ test at the end has to be sure that the final close is complete before it checks the pointer. We cannot use a completion here because if ->hp_destroy is never called (ie, there is an API bug) then the test will hang. Reported by: Guenter Roeck Closes: https://lore.kernel.org/netdev/ZcKDd1to4MPANCrn@tissot.1015granger.net/T/#mac5c6299f86799f1c71776f3a07f9c566c7c3c40 Fixes: 4a0f07d71b04 ("net/handshake: Fix memory leak in __sock_create() and sock_alloc_file()") Signed-off-by: Chuck Lever Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/170724699027.91401.7839730697326806733.stgit@oracle-102.nfsv4bat.org Signed-off-by: Jakub Kicinski --- net/handshake/handshake-test.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c index 16ed7bfd29e4f..34fd1d9b2db86 100644 --- a/net/handshake/handshake-test.c +++ b/net/handshake/handshake-test.c @@ -471,7 +471,10 @@ static void handshake_req_destroy_test1(struct kunit *test) handshake_req_cancel(sock->sk); /* Act */ - fput(filp); + /* Ensure the close/release/put process has run to + * completion before checking the result. + */ + __fput_sync(filp); /* Assert */ KUNIT_EXPECT_PTR_EQ(test, handshake_req_destroy_test, req); -- GitLab From 9b0ed890ac2ae233efd8b27d11aee28a19437bb8 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Wed, 7 Feb 2024 09:47:36 +0100 Subject: [PATCH 0861/1405] bonding: do not report NETDEV_XDP_ACT_XSK_ZEROCOPY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not report the XDP capability NETDEV_XDP_ACT_XSK_ZEROCOPY as the bonding driver does not support XDP and AF_XDP in zero-copy mode even if the real NIC drivers do. Note that the driver used to report everything as supported before a device was bonded. Instead of just masking out the zero-copy support from this, have the driver report that no XDP feature is supported until a real device is bonded. This seems to be more truthful as it is the real drivers that decide what XDP features are supported. Fixes: cb9e6e584d58 ("bonding: add xdp_features support") Reported-by: Prashant Batra Link: https://lore.kernel.org/all/CAJ8uoz2ieZCopgqTvQ9ZY6xQgTbujmC6XkMTamhp68O-h_-rLg@mail.gmail.com/T/ Signed-off-by: Magnus Karlsson Reviewed-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20240207084737.20890-1-magnus.karlsson@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 4e0600c7b050f..a11748b8d69b4 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1819,6 +1819,8 @@ void bond_xdp_set_features(struct net_device *bond_dev) bond_for_each_slave(bond, slave, iter) val &= slave->dev->xdp_features; + val &= ~NETDEV_XDP_ACT_XSK_ZEROCOPY; + xdp_set_features_flag(bond_dev, val); } @@ -5909,9 +5911,6 @@ void bond_setup(struct net_device *bond_dev) if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) bond_dev->features |= BOND_XFRM_FEATURES; #endif /* CONFIG_XFRM_OFFLOAD */ - - if (bond_xdp_check(bond)) - bond_dev->xdp_features = NETDEV_XDP_ACT_MASK; } /* Destroy a bonding device. -- GitLab From 4ab18af47a2c2a80ac11674122935700caf80cc6 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 6 Feb 2024 18:17:17 +0200 Subject: [PATCH 0862/1405] devlink: Fix command annotation documentation Command example string is not read as command. Fix command annotation. Fixes: a8ce7b26a51e ("devlink: Expose port function commands to control migratable") Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240206161717.466653-1-parav@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/networking/devlink/devlink-port.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst index e33ad2401ad70..562f46b412744 100644 --- a/Documentation/networking/devlink/devlink-port.rst +++ b/Documentation/networking/devlink/devlink-port.rst @@ -126,7 +126,7 @@ Users may also set the RoCE capability of the function using `devlink port function set roce` command. Users may also set the function as migratable using -'devlink port function set migratable' command. +`devlink port function set migratable` command. Users may also set the IPsec crypto capability of the function using `devlink port function set ipsec_crypto` command. -- GitLab From 02d9009f4e8c27dcf10c3e39bc0666436686a219 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 7 Feb 2024 18:31:10 +0100 Subject: [PATCH 0863/1405] selftests: net: add more missing kernel config The reuseport_addr_any.sh is currently skipping DCCP tests and pmtu.sh is skipping all the FOU/GUE related cases: add the missing options. Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/38d3ca7f909736c1aef56e6244d67c82a9bba6ff.1707326987.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 3b749addd3640..5e4390cac17ed 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -24,10 +24,14 @@ CONFIG_IFB=y CONFIG_INET_DIAG=y CONFIG_INET_ESP=y CONFIG_INET_ESP_OFFLOAD=y +CONFIG_NET_FOU=y +CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_IP_GRE=m CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=m +CONFIG_IPV6_SIT=y +CONFIG_IP_DCCP=m CONFIG_NF_NAT=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP_NF_IPTABLES=m @@ -62,6 +66,7 @@ CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_U32=m CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m +CONFIG_NET_IPIP=y CONFIG_NET_SCH_FQ_CODEL=m CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_FQ=m @@ -78,7 +83,6 @@ CONFIG_TLS=m CONFIG_TRACEPOINTS=y CONFIG_NET_DROP_MONITOR=m CONFIG_NETDEVSIM=m -CONFIG_NET_FOU=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_SCH_INGRESS=m -- GitLab From c0ec2a712daf133d9996a8a1b7ee2d4996080363 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Tue, 30 Jan 2024 19:27:40 +0800 Subject: [PATCH 0864/1405] crypto: virtio/akcipher - Fix stack overflow on memcpy sizeof(struct virtio_crypto_akcipher_session_para) is less than sizeof(struct virtio_crypto_op_ctrl_req::u), copying more bytes from stack variable leads stack overflow. Clang reports this issue by commands: make -j CC=clang-14 mrproper >/dev/null 2>&1 make -j O=/tmp/crypto-build CC=clang-14 allmodconfig >/dev/null 2>&1 make -j O=/tmp/crypto-build W=1 CC=clang-14 drivers/crypto/virtio/ virtio_crypto_akcipher_algs.o Fixes: 59ca6c93387d ("virtio-crypto: implement RSA algorithm") Link: https://lore.kernel.org/all/0a194a79-e3a3-45e7-be98-83abd3e1cb7e@roeck-us.net/ Cc: Signed-off-by: zhenwei pi Tested-by: Nathan Chancellor # build Acked-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Herbert Xu --- drivers/crypto/virtio/virtio_crypto_akcipher_algs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c index 2621ff8a93764..de53eddf6796b 100644 --- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -104,7 +104,8 @@ static void virtio_crypto_dataq_akcipher_callback(struct virtio_crypto_request * } static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher_ctx *ctx, - struct virtio_crypto_ctrl_header *header, void *para, + struct virtio_crypto_ctrl_header *header, + struct virtio_crypto_akcipher_session_para *para, const uint8_t *key, unsigned int keylen) { struct scatterlist outhdr_sg, key_sg, inhdr_sg, *sgs[3]; @@ -128,7 +129,7 @@ static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher ctrl = &vc_ctrl_req->ctrl; memcpy(&ctrl->header, header, sizeof(ctrl->header)); - memcpy(&ctrl->u, para, sizeof(ctrl->u)); + memcpy(&ctrl->u.akcipher_create_session.para, para, sizeof(*para)); input = &vc_ctrl_req->input; input->status = cpu_to_le32(VIRTIO_CRYPTO_ERR); -- GitLab From 17c8e9ac95d81d10e9619a845a68b83c9384be64 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 31 Jan 2024 13:05:13 +0100 Subject: [PATCH 0865/1405] RISC-V: paravirt: steal_time should be static steal_time is not used outside paravirt.c, make it static, as sparse suggested. Fixes: fdf68acccfc6 ("RISC-V: paravirt: Implement steal-time support") Signed-off-by: Andrew Jones Reviewed-by: Atish Patra Signed-off-by: Anup Patel --- arch/riscv/kernel/paravirt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c index 8e114f5930cec..15c5d4048db59 100644 --- a/arch/riscv/kernel/paravirt.c +++ b/arch/riscv/kernel/paravirt.c @@ -41,7 +41,7 @@ static int __init parse_no_stealacc(char *arg) early_param("no-steal-acc", parse_no_stealacc); -DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64); +static DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64); static bool __init has_pv_steal_clock(void) { -- GitLab From 3752219b6007ee0421cc228f442f33b31f85addd Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 31 Jan 2024 13:05:14 +0100 Subject: [PATCH 0866/1405] RISC-V: paravirt: Use correct restricted types __le32 and __le64 types should be used with le32_to_cpu() and le64_to_cpu(), as sparse helpfully points out. Fixes: fdf68acccfc6 ("RISC-V: paravirt: Implement steal-time support") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401011933.hL9zqmKo-lkp@intel.com/ Signed-off-by: Andrew Jones Reviewed-by: Atish Patra Signed-off-by: Anup Patel --- arch/riscv/kernel/paravirt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c index 15c5d4048db59..0d6225fd3194e 100644 --- a/arch/riscv/kernel/paravirt.c +++ b/arch/riscv/kernel/paravirt.c @@ -91,8 +91,8 @@ static int pv_time_cpu_down_prepare(unsigned int cpu) static u64 pv_time_steal_clock(int cpu) { struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu); - u32 sequence; - u64 steal; + __le32 sequence; + __le64 steal; /* * Check the sequence field before and after reading the steal -- GitLab From f072b272aa27d57cf7fe6fdedb30fb50f391974e Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 31 Jan 2024 13:05:15 +0100 Subject: [PATCH 0867/1405] RISC-V: KVM: Use correct restricted types __le32 and __le64 types should be used with le32_to_cpu() and le64_to_cpu() and __user is needed for pointers referencing guest memory, as sparse helpfully points out. Fixes: e9f12b5fff8a ("RISC-V: KVM: Implement SBI STA extension") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401020142.lwFEDK5v-lkp@intel.com/ Signed-off-by: Andrew Jones Reviewed-by: Atish Patra Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_sbi_sta.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c index 01f09fe8c3b02..d8cf9ca28c616 100644 --- a/arch/riscv/kvm/vcpu_sbi_sta.c +++ b/arch/riscv/kvm/vcpu_sbi_sta.c @@ -26,8 +26,12 @@ void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu) { gpa_t shmem = vcpu->arch.sta.shmem; u64 last_steal = vcpu->arch.sta.last_steal; - u32 *sequence_ptr, sequence; - u64 *steal_ptr, steal; + __le32 __user *sequence_ptr; + __le64 __user *steal_ptr; + __le32 sequence_le; + __le64 steal_le; + u32 sequence; + u64 steal; unsigned long hva; gfn_t gfn; @@ -47,22 +51,22 @@ void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu) return; } - sequence_ptr = (u32 *)(hva + offset_in_page(shmem) + + sequence_ptr = (__le32 __user *)(hva + offset_in_page(shmem) + offsetof(struct sbi_sta_struct, sequence)); - steal_ptr = (u64 *)(hva + offset_in_page(shmem) + + steal_ptr = (__le64 __user *)(hva + offset_in_page(shmem) + offsetof(struct sbi_sta_struct, steal)); - if (WARN_ON(get_user(sequence, sequence_ptr))) + if (WARN_ON(get_user(sequence_le, sequence_ptr))) return; - sequence = le32_to_cpu(sequence); + sequence = le32_to_cpu(sequence_le); sequence += 1; if (WARN_ON(put_user(cpu_to_le32(sequence), sequence_ptr))) return; - if (!WARN_ON(get_user(steal, steal_ptr))) { - steal = le64_to_cpu(steal); + if (!WARN_ON(get_user(steal_le, steal_ptr))) { + steal = le64_to_cpu(steal_le); vcpu->arch.sta.last_steal = READ_ONCE(current->sched_info.run_delay); steal += vcpu->arch.sta.last_steal - last_steal; WARN_ON(put_user(cpu_to_le64(steal), steal_ptr)); -- GitLab From a8b9cf62ade1bf17261a979fc97e40c2d7842353 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 10 Jan 2024 09:13:06 +0900 Subject: [PATCH 0868/1405] ftrace: Fix DIRECT_CALLS to use SAVE_REGS by default The commit 60c8971899f3 ("ftrace: Make DIRECT_CALLS work WITH_ARGS and !WITH_REGS") changed DIRECT_CALLS to use SAVE_ARGS when there are multiple ftrace_ops at the same function, but since the x86 only support to jump to direct_call from ftrace_regs_caller, when we set the function tracer on the same target function on x86, ftrace-direct does not work as below (this actually works on arm64.) At first, insmod ftrace-direct.ko to put a direct_call on 'wake_up_process()'. # insmod kernel/samples/ftrace/ftrace-direct.ko # less trace ... -0 [006] ..s1. 564.686958: my_direct_func: waking up rcu_preempt-17 -0 [007] ..s1. 564.687836: my_direct_func: waking up kcompactd0-63 -0 [006] ..s1. 564.690926: my_direct_func: waking up rcu_preempt-17 -0 [006] ..s1. 564.696872: my_direct_func: waking up rcu_preempt-17 -0 [007] ..s1. 565.191982: my_direct_func: waking up kcompactd0-63 Setup a function filter to the 'wake_up_process' too, and enable it. # cd /sys/kernel/tracing/ # echo wake_up_process > set_ftrace_filter # echo function > current_tracer # less trace ... -0 [006] ..s3. 686.180972: wake_up_process <-call_timer_fn -0 [006] ..s3. 686.186919: wake_up_process <-call_timer_fn -0 [002] ..s3. 686.264049: wake_up_process <-call_timer_fn -0 [002] d.h6. 686.515216: wake_up_process <-kick_pool -0 [002] d.h6. 686.691386: wake_up_process <-kick_pool Then, only function tracer is shown on x86. But if you enable 'kprobe on ftrace' event (which uses SAVE_REGS flag) on the same function, it is shown again. # echo 'p wake_up_process' >> dynamic_events # echo 1 > events/kprobes/p_wake_up_process_0/enable # echo > trace # less trace ... -0 [006] ..s2. 2710.345919: p_wake_up_process_0: (wake_up_process+0x4/0x20) -0 [006] ..s3. 2710.345923: wake_up_process <-call_timer_fn -0 [006] ..s1. 2710.345928: my_direct_func: waking up rcu_preempt-17 -0 [006] ..s2. 2710.349931: p_wake_up_process_0: (wake_up_process+0x4/0x20) -0 [006] ..s3. 2710.349934: wake_up_process <-call_timer_fn -0 [006] ..s1. 2710.349937: my_direct_func: waking up rcu_preempt-17 To fix this issue, use SAVE_REGS flag for multiple ftrace_ops flag of direct_call by default. Link: https://lore.kernel.org/linux-trace-kernel/170484558617.178953.1590516949390270842.stgit@devnote2 Fixes: 60c8971899f3 ("ftrace: Make DIRECT_CALLS work WITH_ARGS and !WITH_REGS") Cc: stable@vger.kernel.org Cc: Florent Revest Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Mark Rutland Tested-by: Mark Rutland [arm64] Acked-by: Jiri Olsa Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b01ae7d360218..c060d5b479102 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5325,7 +5325,17 @@ static LIST_HEAD(ftrace_direct_funcs); static int register_ftrace_function_nolock(struct ftrace_ops *ops); +/* + * If there are multiple ftrace_ops, use SAVE_REGS by default, so that direct + * call will be jumped from ftrace_regs_caller. Only if the architecture does + * not support ftrace_regs_caller but direct_call, use SAVE_ARGS so that it + * jumps from ftrace_caller for multiple ftrace_ops. + */ +#ifndef HAVE_DYNAMIC_FTRACE_WITH_REGS #define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_ARGS) +#else +#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS) +#endif static int check_direct_multi(struct ftrace_ops *ops) { -- GitLab From 7d708c145b2631941b8b0b4a740dc2990818c39c Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Fri, 9 Feb 2024 01:24:54 +0000 Subject: [PATCH 0869/1405] Revert "usb: dwc3: Support EBC feature of DWC_usb31" This reverts commit 398aa9a7e77cf23c2a6f882ddd3dcd96f21771dc. The update to the gadget API to support EBC feature is incomplete. It's missing at least the following: * New usage documentation * Gadget capability check * Condition for the user to check how many and which endpoints can be used as "fifo_mode" * Description of how it can affect completed request (e.g. dwc3 won't update TRB on completion -- ie. how it can affect request's actual length report) Let's revert this until it's ready. Fixes: 398aa9a7e77c ("usb: dwc3: Support EBC feature of DWC_usb31") Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/3042f847ff904b4dd4e4cf66a1b9df470e63439e.1707441690.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 1 - drivers/usb/dwc3/gadget.c | 6 ------ drivers/usb/dwc3/gadget.h | 2 -- include/linux/usb/gadget.h | 1 - 4 files changed, 10 deletions(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index e3eea965e57bf..e120611a5174f 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -376,7 +376,6 @@ /* Global HWPARAMS4 Register */ #define DWC3_GHWPARAMS4_HIBER_SCRATCHBUFS(n) (((n) & (0x0f << 13)) >> 13) #define DWC3_MAX_HIBER_SCRATCHBUFS 15 -#define DWC3_EXT_BUFF_CONTROL BIT(21) /* Global HWPARAMS6 Register */ #define DWC3_GHWPARAMS6_BCSUPPORT BIT(14) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 564976b3e2b91..4c8dd67246788 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -673,12 +673,6 @@ static int dwc3_gadget_set_ep_config(struct dwc3_ep *dep, unsigned int action) params.param1 |= DWC3_DEPCFG_BINTERVAL_M1(bInterval_m1); } - if (dep->endpoint.fifo_mode) { - if (!(dwc->hwparams.hwparams4 & DWC3_EXT_BUFF_CONTROL)) - return -EINVAL; - params.param1 |= DWC3_DEPCFG_EBC_HWO_NOWB | DWC3_DEPCFG_USE_EBC; - } - return dwc3_send_gadget_ep_cmd(dep, DWC3_DEPCMD_SETEPCONFIG, ¶ms); } diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h index fd7a4e94397e6..55a56cf67d736 100644 --- a/drivers/usb/dwc3/gadget.h +++ b/drivers/usb/dwc3/gadget.h @@ -26,8 +26,6 @@ struct dwc3; #define DWC3_DEPCFG_XFER_NOT_READY_EN BIT(10) #define DWC3_DEPCFG_FIFO_ERROR_EN BIT(11) #define DWC3_DEPCFG_STREAM_EVENT_EN BIT(13) -#define DWC3_DEPCFG_EBC_HWO_NOWB BIT(14) -#define DWC3_DEPCFG_USE_EBC BIT(15) #define DWC3_DEPCFG_BINTERVAL_M1(n) (((n) & 0xff) << 16) #define DWC3_DEPCFG_STREAM_CAPABLE BIT(24) #define DWC3_DEPCFG_EP_NUMBER(n) (((n) & 0x1f) << 25) diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index a771ccc038ac9..6532beb587b19 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -236,7 +236,6 @@ struct usb_ep { unsigned max_streams:16; unsigned mult:2; unsigned maxburst:5; - unsigned fifo_mode:1; u8 address; const struct usb_endpoint_descriptor *desc; const struct usb_ss_ep_comp_descriptor *comp_desc; -- GitLab From f4653ec9861cd96a1a6a3258c4a807898ee8cf3c Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 6 Feb 2024 17:18:00 -0800 Subject: [PATCH 0870/1405] of: property: Improve finding the consumer of a remote-endpoint property We have a more accurate function to find the right consumer of a remote-endpoint property instead of searching for a parent with compatible string property. So, use that instead. While at it, make the code to find the consumer a bit more flexible and based on the property being parsed. Fixes: f7514a663016 ("of: property: fw_devlink: Add support for remote-endpoint") Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20240207011803.2637531-2-saravanak@google.com Signed-off-by: Rob Herring --- drivers/of/property.c | 47 +++++++++---------------------------------- 1 file changed, 10 insertions(+), 37 deletions(-) diff --git a/drivers/of/property.c b/drivers/of/property.c index bbf0dee2fb9ca..773304be5ca6c 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1063,36 +1063,6 @@ of_fwnode_device_get_match_data(const struct fwnode_handle *fwnode, return of_device_get_match_data(dev); } -static struct device_node *of_get_compat_node(struct device_node *np) -{ - of_node_get(np); - - while (np) { - if (!of_device_is_available(np)) { - of_node_put(np); - np = NULL; - } - - if (of_property_present(np, "compatible")) - break; - - np = of_get_next_parent(np); - } - - return np; -} - -static struct device_node *of_get_compat_node_parent(struct device_node *np) -{ - struct device_node *parent, *node; - - parent = of_get_parent(np); - node = of_get_compat_node(parent); - of_node_put(parent); - - return node; -} - static void of_link_to_phandle(struct device_node *con_np, struct device_node *sup_np) { @@ -1222,10 +1192,10 @@ static struct device_node *parse_##fname(struct device_node *np, \ * parse_prop.prop_name: Name of property holding a phandle value * parse_prop.index: For properties holding a list of phandles, this is the * index into the list + * @get_con_dev: If the consumer node containing the property is never converted + * to a struct device, implement this ops so fw_devlink can use it + * to find the true consumer. * @optional: Describes whether a supplier is mandatory or not - * @node_not_dev: The consumer node containing the property is never converted - * to a struct device. Instead, parse ancestor nodes for the - * compatible property to find a node corresponding to a device. * * Returns: * parse_prop() return values are @@ -1236,8 +1206,8 @@ static struct device_node *parse_##fname(struct device_node *np, \ struct supplier_bindings { struct device_node *(*parse_prop)(struct device_node *np, const char *prop_name, int index); + struct device_node *(*get_con_dev)(struct device_node *np); bool optional; - bool node_not_dev; }; DEFINE_SIMPLE_PROP(clocks, "clocks", "#clock-cells") @@ -1352,7 +1322,10 @@ static const struct supplier_bindings of_supplier_bindings[] = { { .parse_prop = parse_pinctrl6, }, { .parse_prop = parse_pinctrl7, }, { .parse_prop = parse_pinctrl8, }, - { .parse_prop = parse_remote_endpoint, .node_not_dev = true, }, + { + .parse_prop = parse_remote_endpoint, + .get_con_dev = of_graph_get_port_parent, + }, { .parse_prop = parse_pwms, }, { .parse_prop = parse_resets, }, { .parse_prop = parse_leds, }, @@ -1403,8 +1376,8 @@ static int of_link_property(struct device_node *con_np, const char *prop_name) while ((phandle = s->parse_prop(con_np, prop_name, i))) { struct device_node *con_dev_np; - con_dev_np = s->node_not_dev - ? of_get_compat_node_parent(con_np) + con_dev_np = s->get_con_dev + ? s->get_con_dev(con_np) : of_node_get(con_np); matched = true; i++; -- GitLab From 782bfd03c3ae2c0e6e01b661b8e18f1de50357be Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 6 Feb 2024 17:18:01 -0800 Subject: [PATCH 0871/1405] of: property: Improve finding the supplier of a remote-endpoint property After commit 4a032827daa8 ("of: property: Simplify of_link_to_phandle()"), remote-endpoint properties created a fwnode link from the consumer device to the supplier endpoint. This is a tiny bit inefficient (not buggy) when trying to create device links or detecting cycles. So, improve this the same way we improved finding the consumer of a remote-endpoint property. Fixes: 4a032827daa8 ("of: property: Simplify of_link_to_phandle()") Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20240207011803.2637531-3-saravanak@google.com Signed-off-by: Rob Herring --- drivers/of/property.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/of/property.c b/drivers/of/property.c index 773304be5ca6c..a5f562a5e1462 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1232,7 +1232,6 @@ DEFINE_SIMPLE_PROP(pinctrl5, "pinctrl-5", NULL) DEFINE_SIMPLE_PROP(pinctrl6, "pinctrl-6", NULL) DEFINE_SIMPLE_PROP(pinctrl7, "pinctrl-7", NULL) DEFINE_SIMPLE_PROP(pinctrl8, "pinctrl-8", NULL) -DEFINE_SIMPLE_PROP(remote_endpoint, "remote-endpoint", NULL) DEFINE_SIMPLE_PROP(pwms, "pwms", "#pwm-cells") DEFINE_SIMPLE_PROP(resets, "resets", "#reset-cells") DEFINE_SIMPLE_PROP(leds, "leds", NULL) @@ -1298,6 +1297,17 @@ static struct device_node *parse_interrupts(struct device_node *np, return of_irq_parse_one(np, index, &sup_args) ? NULL : sup_args.np; } +static struct device_node *parse_remote_endpoint(struct device_node *np, + const char *prop_name, + int index) +{ + /* Return NULL for index > 0 to signify end of remote-endpoints. */ + if (!index || strcmp(prop_name, "remote-endpoint")) + return NULL; + + return of_graph_get_remote_port_parent(np); +} + static const struct supplier_bindings of_supplier_bindings[] = { { .parse_prop = parse_clocks, }, { .parse_prop = parse_interconnects, }, -- GitLab From 8f1e0d791b5281f3a38620bc7c57763dc551be15 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 6 Feb 2024 17:18:02 -0800 Subject: [PATCH 0872/1405] of: property: Add in-ports/out-ports support to of_graph_get_port_parent() Similar to the existing "ports" node name, coresight device tree bindings have added "in-ports" and "out-ports" as standard node names for a collection of ports. Add support for these name to of_graph_get_port_parent() so that remote-endpoint parsing can find the correct parent node for these coresight ports too. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20240207011803.2637531-4-saravanak@google.com Signed-off-by: Rob Herring --- drivers/of/property.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/of/property.c b/drivers/of/property.c index a5f562a5e1462..b71267c6667cf 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -763,7 +763,9 @@ struct device_node *of_graph_get_port_parent(struct device_node *node) /* Walk 3 levels up only if there is 'ports' node. */ for (depth = 3; depth && node; depth--) { node = of_get_next_parent(node); - if (depth == 2 && !of_node_name_eq(node, "ports")) + if (depth == 2 && !of_node_name_eq(node, "ports") && + !of_node_name_eq(node, "in-ports") && + !of_node_name_eq(node, "out-ports")) break; } return node; -- GitLab From 44dc5c41b5b1267d4dd037d26afc0c4d3a568acb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 9 Feb 2024 06:36:22 -0500 Subject: [PATCH 0873/1405] tracing: Fix wasted memory in saved_cmdlines logic While looking at improving the saved_cmdlines cache I found a huge amount of wasted memory that should be used for the cmdlines. The tracing data saves pids during the trace. At sched switch, if a trace occurred, it will save the comm of the task that did the trace. This is saved in a "cache" that maps pids to comms and exposed to user space via the /sys/kernel/tracing/saved_cmdlines file. Currently it only caches by default 128 comms. The structure that uses this creates an array to store the pids using PID_MAX_DEFAULT (which is usually set to 32768). This causes the structure to be of the size of 131104 bytes on 64 bit machines. In hex: 131104 = 0x20020, and since the kernel allocates generic memory in powers of two, the kernel would allocate 0x40000 or 262144 bytes to store this structure. That leaves 131040 bytes of wasted space. Worse, the structure points to an allocated array to store the comm names, which is 16 bytes times the amount of names to save (currently 128), which is 2048 bytes. Instead of allocating a separate array, make the structure end with a variable length string and use the extra space for that. This is similar to a recommendation that Linus had made about eventfs_inode names: https://lore.kernel.org/all/20240130190355.11486-5-torvalds@linux-foundation.org/ Instead of allocating a separate string array to hold the saved comms, have the structure end with: char saved_cmdlines[]; and round up to the next power of two over sizeof(struct saved_cmdline_buffers) + num_cmdlines * TASK_COMM_LEN It will use this extra space for the saved_cmdline portion. Now, instead of saving only 128 comms by default, by using this wasted space at the end of the structure it can save over 8000 comms and even saves space by removing the need for allocating the other array. Link: https://lore.kernel.org/linux-trace-kernel/20240209063622.1f7b6d5f@rorschach.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Vincent Donnefort Cc: Sven Schnelle Cc: Mete Durlu Fixes: 939c7a4f04fcd ("tracing: Introduce saved_cmdlines_size file") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 75 ++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2a7c6fd934e9c..9ff8a439d6746 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2320,7 +2320,7 @@ struct saved_cmdlines_buffer { unsigned *map_cmdline_to_pid; unsigned cmdline_num; int cmdline_idx; - char *saved_cmdlines; + char saved_cmdlines[]; }; static struct saved_cmdlines_buffer *savedcmd; @@ -2334,47 +2334,58 @@ static inline void set_cmdline(int idx, const char *cmdline) strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN); } -static int allocate_cmdlines_buffer(unsigned int val, - struct saved_cmdlines_buffer *s) +static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) +{ + int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN); + + kfree(s->map_cmdline_to_pid); + free_pages((unsigned long)s, order); +} + +static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val) { + struct saved_cmdlines_buffer *s; + struct page *page; + int orig_size, size; + int order; + + /* Figure out how much is needed to hold the given number of cmdlines */ + orig_size = sizeof(*s) + val * TASK_COMM_LEN; + order = get_order(orig_size); + size = 1 << (order + PAGE_SHIFT); + page = alloc_pages(GFP_KERNEL, order); + if (!page) + return NULL; + + s = page_address(page); + memset(s, 0, sizeof(*s)); + + /* Round up to actual allocation */ + val = (size - sizeof(*s)) / TASK_COMM_LEN; + s->cmdline_num = val; + s->map_cmdline_to_pid = kmalloc_array(val, sizeof(*s->map_cmdline_to_pid), GFP_KERNEL); - if (!s->map_cmdline_to_pid) - return -ENOMEM; - - s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL); - if (!s->saved_cmdlines) { - kfree(s->map_cmdline_to_pid); - return -ENOMEM; + if (!s->map_cmdline_to_pid) { + free_saved_cmdlines_buffer(s); + return NULL; } s->cmdline_idx = 0; - s->cmdline_num = val; memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(s->map_pid_to_cmdline)); memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP, val * sizeof(*s->map_cmdline_to_pid)); - return 0; + return s; } static int trace_create_savedcmd(void) { - int ret; - - savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL); - if (!savedcmd) - return -ENOMEM; - - ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd); - if (ret < 0) { - kfree(savedcmd); - savedcmd = NULL; - return -ENOMEM; - } + savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT); - return 0; + return savedcmd ? 0 : -ENOMEM; } int is_tracing_stopped(void) @@ -6056,26 +6067,14 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } -static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) -{ - kfree(s->saved_cmdlines); - kfree(s->map_cmdline_to_pid); - kfree(s); -} - static int tracing_resize_saved_cmdlines(unsigned int val) { struct saved_cmdlines_buffer *s, *savedcmd_temp; - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = allocate_cmdlines_buffer(val); if (!s) return -ENOMEM; - if (allocate_cmdlines_buffer(val, s) < 0) { - kfree(s); - return -ENOMEM; - } - preempt_disable(); arch_spin_lock(&trace_cmdline_lock); savedcmd_temp = savedcmd; -- GitLab From e5aa6d51a2ef8c7ef7e3fe76bebe530fb68e7f08 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 9 Feb 2024 09:20:44 +0100 Subject: [PATCH 0874/1405] ALSA: hda/cs35l56: select intended config FW_CS_DSP Commit 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") adds configs SND_HDA_SCODEC_CS35L56_{I2C,SPI}, which selects the non-existing config CS_DSP. Note the renaming in commit d7cfdf17cb9d ("firmware: cs_dsp: Rename KConfig symbol CS_DSP -> FW_CS_DSP"), though. Select the intended config FW_CS_DSP. This broken select command probably was not noticed as the configs also select SND_HDA_CS_DSP_CONTROLS and this then selects FW_CS_DSP. So, the select FW_CS_DSP could actually be dropped, but we will keep this redundancy in place as the author originally also intended to have this redundancy of selects in place. Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Signed-off-by: Lukas Bulwahn Reviewed-by: Simon Trimmer Link: https://lore.kernel.org/r/20240209082044.3981-1-lukas.bulwahn@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig index 21a90b3c4cc73..8e0ff70fb6101 100644 --- a/sound/pci/hda/Kconfig +++ b/sound/pci/hda/Kconfig @@ -156,7 +156,7 @@ config SND_HDA_SCODEC_CS35L56_I2C depends on I2C depends on ACPI || COMPILE_TEST depends on SND_SOC - select CS_DSP + select FW_CS_DSP select SND_HDA_GENERIC select SND_SOC_CS35L56_SHARED select SND_HDA_SCODEC_CS35L56 @@ -171,7 +171,7 @@ config SND_HDA_SCODEC_CS35L56_SPI depends on SPI_MASTER depends on ACPI || COMPILE_TEST depends on SND_SOC - select CS_DSP + select FW_CS_DSP select SND_HDA_GENERIC select SND_SOC_CS35L56_SHARED select SND_HDA_SCODEC_CS35L56 -- GitLab From 7f8b81ca35f909747842b649025af541ec172b8f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 6 Feb 2024 14:58:49 +0100 Subject: [PATCH 0875/1405] s390/configs: provide compat topic configuration target CONFIG_COMPAT is disabled by default, however compat code still needs to be compile tested. Add a compat topic configuration target which allows to enable it easily. Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/configs/compat.config | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 arch/s390/configs/compat.config diff --git a/arch/s390/configs/compat.config b/arch/s390/configs/compat.config new file mode 100644 index 0000000000000..6fd051453ae82 --- /dev/null +++ b/arch/s390/configs/compat.config @@ -0,0 +1,3 @@ +# Help: Enable compat support +CONFIG_COMPAT=y +CONFIG_COMPAT_32BIT_TIME=y -- GitLab From 027790f611321acabbd3c938c1c8ebc08dddd71e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 6 Feb 2024 14:58:50 +0100 Subject: [PATCH 0876/1405] s390/configs: enable INIT_STACK_ALL_ZERO in all configurations It looks like all distributions will enable INIT_STACK_ALL_ZERO. Reflect that in the default configurations. Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 1 - arch/s390/configs/defconfig | 1 - arch/s390/configs/zfcpdump_defconfig | 1 - 3 files changed, 3 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index cae2dd34fbb49..0e8ba18a4a084 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -709,7 +709,6 @@ CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" -CONFIG_INIT_STACK_NONE=y CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 42b988873e544..c7a39b9e5f01d 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -693,7 +693,6 @@ CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" -CONFIG_INIT_STACK_NONE=y CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_USER=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 30d2a16876650..217a92e025c0e 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -64,7 +64,6 @@ CONFIG_ZFCP=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set CONFIG_LSM="yama,loadpin,safesetid,integrity" -CONFIG_INIT_STACK_NONE=y # CONFIG_ZLIB_DFLTCC is not set CONFIG_XZ_DEC_MICROLZMA=y CONFIG_PRINTK_TIME=y -- GitLab From 124468af7e769a52d27c3290007ac6e2ba346ccd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 6 Feb 2024 14:58:51 +0100 Subject: [PATCH 0877/1405] s390/configs: update default configurations Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 7 +------ arch/s390/configs/defconfig | 8 +------- arch/s390/configs/zfcpdump_defconfig | 1 + 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 0e8ba18a4a084..c924be0d7ed87 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -118,7 +118,6 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y CONFIG_IP_MULTICAST=y @@ -374,6 +373,7 @@ CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m @@ -436,9 +436,6 @@ CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y # CONFIG_MD_BITMAP_FILE is not set -CONFIG_MD_LINEAR=m -CONFIG_MD_MULTIPATH=m -CONFIG_MD_FAULTY=m CONFIG_MD_CLUSTER=m CONFIG_BCACHE=m CONFIG_BLK_DEV_DM=y @@ -637,7 +634,6 @@ CONFIG_FUSE_FS=y CONFIG_CUSE=m CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=m -CONFIG_NETFS_SUPPORT=m CONFIG_NETFS_STATS=y CONFIG_FSCACHE=y CONFIG_CACHEFILES=m @@ -738,7 +734,6 @@ CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_ARC4=m -CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_LRW=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index c7a39b9e5f01d..c8f0c9fe40d70 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -109,7 +109,6 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y CONFIG_IP_MULTICAST=y @@ -364,6 +363,7 @@ CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m @@ -426,9 +426,6 @@ CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y # CONFIG_MD_BITMAP_FILE is not set -CONFIG_MD_LINEAR=m -CONFIG_MD_MULTIPATH=m -CONFIG_MD_FAULTY=m CONFIG_MD_CLUSTER=m CONFIG_BCACHE=m CONFIG_BLK_DEV_DM=y @@ -622,7 +619,6 @@ CONFIG_FUSE_FS=y CONFIG_CUSE=m CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=m -CONFIG_NETFS_SUPPORT=m CONFIG_NETFS_STATS=y CONFIG_FSCACHE=y CONFIG_CACHEFILES=m @@ -723,11 +719,9 @@ CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_ADIANTUM=m CONFIG_CRYPTO_ARC4=m -CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_HCTR2=m CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_OFB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_CHACHA20POLY1305=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 217a92e025c0e..c51f3ec4eb28a 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -8,6 +8,7 @@ CONFIG_BPF_SYSCALL=y # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y CONFIG_MARCH_Z13=y CONFIG_NR_CPUS=2 -- GitLab From 727b943263dc98a7aca355cc0302158218f71543 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 9 Feb 2024 14:57:00 +0000 Subject: [PATCH 0878/1405] ASoC: cs35l56: Remove default from IRQ1_CFG register The driver never uses the IRQ1_CFG register so there's no need to provide a default value. It's set as a readable register only for debugging through the regmap registers file. A system-specific firmware could overwrite this register with a non-default value. Therefore the driver can't hardcode what the initial value actually is. As the register is only for debugging the value can be left unknown until someone wants to read it through debugfs. Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20240209145700.1555950-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-shared.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 02fba4bc0a14f..995d979b6d87e 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -51,7 +51,6 @@ static const struct reg_default cs35l56_reg_defaults[] = { { CS35L56_SWIRE_DP3_CH2_INPUT, 0x00000019 }, { CS35L56_SWIRE_DP3_CH3_INPUT, 0x00000029 }, { CS35L56_SWIRE_DP3_CH4_INPUT, 0x00000028 }, - { CS35L56_IRQ1_CFG, 0x00000000 }, { CS35L56_IRQ1_MASK_1, 0x83ffffff }, { CS35L56_IRQ1_MASK_2, 0xffff7fff }, { CS35L56_IRQ1_MASK_4, 0xe0ffffff }, -- GitLab From 9f208e097801f9c2088eb339a1162fff81c08b4e Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Fri, 9 Feb 2024 15:59:07 +0100 Subject: [PATCH 0879/1405] spi: spi-ppc4xx: include missing platform_device.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the driver currently fails to compile on 6.8-rc3 due to: | spi-ppc4xx.c: In function ‘spi_ppc4xx_of_probe’: | @346:36: error: invalid use of undefined type ‘struct platform_device’ | 346 | struct device_node *np = op->dev.of_node; | | ^~ | ... (more similar errors) it was working with 6.7. Looks like it only needed the include and its compiling fine! Signed-off-by: Christian Lamparter Link: https://lore.kernel.org/r/3eb3f9c4407ba99d1cd275662081e46b9e839173.1707490664.git.chunkeey@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-ppc4xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c index 03aab661be9d3..412d6e6782246 100644 --- a/drivers/spi/spi-ppc4xx.c +++ b/drivers/spi/spi-ppc4xx.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include -- GitLab From f6a1892585cd19e63c4ef2334e26cd536d5b678d Mon Sep 17 00:00:00 2001 From: Aleksander Mazur Date: Tue, 23 Jan 2024 14:43:00 +0100 Subject: [PATCH 0880/1405] x86/Kconfig: Transmeta Crusoe is CPU family 5, not 6 The kernel built with MCRUSOE is unbootable on Transmeta Crusoe. It shows the following error message: This kernel requires an i686 CPU, but only detected an i586 CPU. Unable to boot - please use a kernel appropriate for your CPU. Remove MCRUSOE from the condition introduced in commit in Fixes, effectively changing X86_MINIMUM_CPU_FAMILY back to 5 on that machine, which matches the CPU family given by CPUID. [ bp: Massage commit message. ] Fixes: 25d76ac88821 ("x86/Kconfig: Explicitly enumerate i686-class CPUs in Kconfig") Signed-off-by: Aleksander Mazur Signed-off-by: Borislav Petkov (AMD) Acked-by: H. Peter Anvin Cc: Link: https://lore.kernel.org/r/20240123134309.1117782-1-deweloper@wp.pl --- arch/x86/Kconfig.cpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index b9224cf2ee4d6..2a7279d80460a 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -379,7 +379,7 @@ config X86_CMOV config X86_MINIMUM_CPU_FAMILY int default "64" if X86_64 - default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8) + default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8) default "5" if X86_32 && X86_CMPXCHG64 default "4" -- GitLab From 61da7c8e2a602f66be578cbbcebe8638c10e0f48 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 30 Jan 2024 15:43:53 +0000 Subject: [PATCH 0881/1405] arm64/signal: Don't assume that TIF_SVE means we saved SVE state When we are in a syscall we will only save the FPSIMD subset even though the task still has access to the full register set, and on context switch we will only remove TIF_SVE when loading the register state. This means that the signal handling code should not assume that TIF_SVE means that the register state is stored in SVE format, it should instead check the format that was recorded during save. Fixes: 8c845e273104 ("arm64/sve: Leave SVE enabled on syscall if we don't context switch") Signed-off-by: Mark Brown Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240130-arm64-sve-signal-regs-v2-1-9fc6f9502782@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 2 +- arch/arm64/kernel/signal.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index a5dc6f7641958..25ceaee6b025d 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1635,7 +1635,7 @@ void fpsimd_preserve_current_state(void) void fpsimd_signal_preserve_current_state(void) { fpsimd_preserve_current_state(); - if (test_thread_flag(TIF_SVE)) + if (current->thread.fp_type == FP_STATE_SVE) sve_to_fpsimd(current); } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 0e8beb3349ea2..425b1bc17a3f6 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -242,7 +242,7 @@ static int preserve_sve_context(struct sve_context __user *ctx) vl = task_get_sme_vl(current); vq = sve_vq_from_vl(vl); flags |= SVE_SIG_FLAG_SM; - } else if (test_thread_flag(TIF_SVE)) { + } else if (current->thread.fp_type == FP_STATE_SVE) { vq = sve_vq_from_vl(vl); } @@ -878,7 +878,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, if (system_supports_sve() || system_supports_sme()) { unsigned int vq = 0; - if (add_all || test_thread_flag(TIF_SVE) || + if (add_all || current->thread.fp_type == FP_STATE_SVE || thread_sm_enabled(¤t->thread)) { int vl = max(sve_max_vl(), sme_max_vl()); -- GitLab From 719da04f2d1285922abca72b074fb6fa75d464ea Mon Sep 17 00:00:00 2001 From: Hojin Nam Date: Thu, 8 Feb 2024 10:34:15 +0900 Subject: [PATCH 0882/1405] perf: CXL: fix mismatched cpmu event opcode S2M NDR BI-ConflictAck opcode is described as 4 in the CXL r3.0 3.3.9 Table 3.43. However, it is defined as 3 in macro definition. Fixes: 5d7107c72796 ("perf: CXL Performance Monitoring Unit driver") Signed-off-by: Hojin Nam Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240208013415epcms2p2904187c8a863f4d0d2adc980fb91a2dc@epcms2p2 Signed-off-by: Will Deacon --- drivers/perf/cxl_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c index 365d964b0f6a6..bc0d414a6aff9 100644 --- a/drivers/perf/cxl_pmu.c +++ b/drivers/perf/cxl_pmu.c @@ -419,7 +419,7 @@ static struct attribute *cxl_pmu_event_attrs[] = { CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmp, CXL_PMU_GID_S2M_NDR, BIT(0)), CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmps, CXL_PMU_GID_S2M_NDR, BIT(1)), CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpe, CXL_PMU_GID_S2M_NDR, BIT(2)), - CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack, CXL_PMU_GID_S2M_NDR, BIT(3)), + CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack, CXL_PMU_GID_S2M_NDR, BIT(4)), /* CXL rev 3.0 Table 3-46 S2M DRS opcodes */ CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdata, CXL_PMU_GID_S2M_DRS, BIT(0)), CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdatanxm, CXL_PMU_GID_S2M_DRS, BIT(1)), -- GitLab From c0b26c06ed008c0898829dda4c2783b8ec478350 Mon Sep 17 00:00:00 2001 From: Seongsu Park Date: Fri, 2 Feb 2024 10:33:06 +0900 Subject: [PATCH 0883/1405] arm64: fix typo in comments fix typo in comments thath -> that Signed-off-by: Seongsu Park Link: https://lore.kernel.org/r/20240202013306.883777-1-sgsu.park@samsung.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 21c824edf8ce4..bd8d4ca81a48c 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -83,7 +83,7 @@ struct arm64_ftr_bits { * to full-0 denotes that this field has no override * * A @mask field set to full-0 with the corresponding @val field set - * to full-1 denotes thath this field has an invalid override. + * to full-1 denotes that this field has an invalid override. */ struct arm64_ftr_override { u64 val; -- GitLab From f9daab0ad01cf9d165dbbbf106ca4e61d06e7fe8 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 5 Feb 2024 23:45:52 -0800 Subject: [PATCH 0884/1405] arm64: jump_label: use constraints "Si" instead of "i" The generic constraint "i" seems to be copied from x86 or arm (and with a redundant generic operand modifier "c"). It works with -fno-PIE but not with -fPIE/-fPIC in GCC's aarch64 port. The machine constraint "S", which denotes a symbol or label reference with a constant offset, supports PIC and has been available in GCC since 2012 and in Clang since 7.0. However, Clang before 19 does not support "S" on a symbol with a constant offset [1] (e.g. `static_key_false(&nf_hooks_needed[pf][hook])` in include/linux/netfilter.h), so we use "i" as a fallback. Suggested-by: Ard Biesheuvel Signed-off-by: Fangrui Song Link: https://github.com/llvm/llvm-project/pull/80255 [1] Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20240206074552.541154-1-maskray@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/jump_label.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 48ddc0f45d228..b7716b215f91a 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -15,6 +15,10 @@ #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE +/* + * Prefer the constraint "S" to support PIC with GCC. Clang before 19 does not + * support "S" on a symbol with a constant offset, so we use "i" as a fallback. + */ static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { @@ -23,9 +27,9 @@ static __always_inline bool arch_static_branch(struct static_key * const key, " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" " .long 1b - ., %l[l_yes] - . \n\t" - " .quad %c0 - . \n\t" + " .quad (%[key] - .) + %[bit0] \n\t" " .popsection \n\t" - : : "i"(&((char *)key)[branch]) : : l_yes); + : : [key]"Si"(key), [bit0]"i"(branch) : : l_yes); return false; l_yes: @@ -40,9 +44,9 @@ static __always_inline bool arch_static_branch_jump(struct static_key * const ke " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" " .long 1b - ., %l[l_yes] - . \n\t" - " .quad %c0 - . \n\t" + " .quad (%[key] - .) + %[bit0] \n\t" " .popsection \n\t" - : : "i"(&((char *)key)[branch]) : : l_yes); + : : [key]"Si"(key), [bit0]"i"(branch) : : l_yes); return false; l_yes: -- GitLab From 50572064ec7109b00eef8880e905f55861c8b3de Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Fri, 9 Feb 2024 17:11:09 +0000 Subject: [PATCH 0885/1405] perf/arm-cmn: Workaround AmpereOneX errata AC04_MESH_1 (incorrect child count) AmpereOneX mesh implementation has a bug in HN-P nodes that makes them report incorrect child count. The failing crosspoints report 8 children while they only have two. When the driver tries to access the inexistent child nodes, it believes it has reached an invalid node type and probing fails. The workaround is to ignore those incorrect child nodes and continue normally. Signed-off-by: Ilkka Koskinen [ rm: rewrote simpler generalised version ] Tested-by: Ilkka Koskinen Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/ce4b1442135fe03d0de41859b04b268c88c854a3.1707498577.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index c584165b13bab..7e3aa7e2345fa 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -2305,6 +2305,17 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) dev_dbg(cmn->dev, "ignoring external node %llx\n", reg); continue; } + /* + * AmpereOneX erratum AC04_MESH_1 makes some XPs report a bogus + * child count larger than the number of valid child pointers. + * A child offset of 0 can only occur on CMN-600; otherwise it + * would imply the root node being its own grandchild, which + * we can safely dismiss in general. + */ + if (reg == 0 && cmn->part != PART_CMN600) { + dev_dbg(cmn->dev, "bogus child pointer?\n"); + continue; + } arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, dn); -- GitLab From 41044d5360685e78a869d40a168491a70cdb7e73 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 23 Jan 2024 11:55:31 -0700 Subject: [PATCH 0886/1405] PCI: Fix active state requirement in PME polling The commit noted in fixes added a bogus requirement that runtime PM managed devices need to be in the RPM_ACTIVE state for PME polling. In fact, only devices in low power states should be polled. However there's still a requirement that the device config space must be accessible, which has implications for both the current state of the polled device and the parent bridge, when present. It's not sufficient to assume the bridge remains in D0 and cases have been observed where the bridge passes the D0 test, but the PM state indicates RPM_SUSPENDING and config space of the polled device becomes inaccessible during pci_pme_wakeup(). Therefore, since the bridge is already effectively required to be in the RPM_ACTIVE state, formalize this in the code and elevate the PM usage count to maintain the state while polling the subordinate device. This resolves a regression reported in the bugzilla below where a Thunderbolt/USB4 hierarchy fails to scan for an attached NVMe endpoint downstream of a bridge in a D3hot power state. Link: https://lore.kernel.org/r/20240123185548.1040096-1-alex.williamson@redhat.com Fixes: d3fcd7360338 ("PCI: Fix runtime PM race with PME polling") Reported-by: Sanath S Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218360 Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas Tested-by: Sanath S Reviewed-by: Rafael J. Wysocki Cc: Lukas Wunner Cc: Mika Westerberg --- drivers/pci/pci.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d8f11a078924c..0a80156fe812f 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2496,29 +2496,36 @@ static void pci_pme_list_scan(struct work_struct *work) if (pdev->pme_poll) { struct pci_dev *bridge = pdev->bus->self; struct device *dev = &pdev->dev; - int pm_status; + struct device *bdev = bridge ? &bridge->dev : NULL; + int bref = 0; /* - * If bridge is in low power state, the - * configuration space of subordinate devices - * may be not accessible + * If we have a bridge, it should be in an active/D0 + * state or the configuration space of subordinate + * devices may not be accessible or stable over the + * course of the call. */ - if (bridge && bridge->current_state != PCI_D0) - continue; + if (bdev) { + bref = pm_runtime_get_if_active(bdev, true); + if (!bref) + continue; + + if (bridge->current_state != PCI_D0) + goto put_bridge; + } /* - * If the device is in a low power state it - * should not be polled either. + * The device itself should be suspended but config + * space must be accessible, therefore it cannot be in + * D3cold. */ - pm_status = pm_runtime_get_if_active(dev, true); - if (!pm_status) - continue; - - if (pdev->current_state != PCI_D3cold) + if (pm_runtime_suspended(dev) && + pdev->current_state != PCI_D3cold) pci_pme_wakeup(pdev, NULL); - if (pm_status > 0) - pm_runtime_put(dev); +put_bridge: + if (bref > 0) + pm_runtime_put(bdev); } else { list_del(&pme_dev->list); kfree(pme_dev); -- GitLab From 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 31 Jan 2024 14:27:25 -0500 Subject: [PATCH 0887/1405] btrfs: don't drop extent_map for free space inode on write error While running the CI for an unrelated change I hit the following panic with generic/648 on btrfs_holes_spacecache. assertion failed: block_start != EXTENT_MAP_HOLE, in fs/btrfs/extent_io.c:1385 ------------[ cut here ]------------ kernel BUG at fs/btrfs/extent_io.c:1385! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 1 PID: 2695096 Comm: fsstress Kdump: loaded Tainted: G W 6.8.0-rc2+ #1 RIP: 0010:__extent_writepage_io.constprop.0+0x4c1/0x5c0 Call Trace: extent_write_cache_pages+0x2ac/0x8f0 extent_writepages+0x87/0x110 do_writepages+0xd5/0x1f0 filemap_fdatawrite_wbc+0x63/0x90 __filemap_fdatawrite_range+0x5c/0x80 btrfs_fdatawrite_range+0x1f/0x50 btrfs_write_out_cache+0x507/0x560 btrfs_write_dirty_block_groups+0x32a/0x420 commit_cowonly_roots+0x21b/0x290 btrfs_commit_transaction+0x813/0x1360 btrfs_sync_file+0x51a/0x640 __x64_sys_fdatasync+0x52/0x90 do_syscall_64+0x9c/0x190 entry_SYSCALL_64_after_hwframe+0x6e/0x76 This happens because we fail to write out the free space cache in one instance, come back around and attempt to write it again. However on the second pass through we go to call btrfs_get_extent() on the inode to get the extent mapping. Because this is a new block group, and with the free space inode we always search the commit root to avoid deadlocking with the tree, we find nothing and return a EXTENT_MAP_HOLE for the requested range. This happens because the first time we try to write the space cache out we hit an error, and on an error we drop the extent mapping. This is normal for normal files, but the free space cache inode is special. We always expect the extent map to be correct. Thus the second time through we end up with a bogus extent map. Since we're deprecating this feature, the most straightforward way to fix this is to simply skip dropping the extent map range for this failed range. I shortened the test by using error injection to stress the area to make it easier to reproduce. With this patch in place we no longer panic with my error injection test. CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/inode.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7bcc1c03437a8..d232eca1bbee2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3184,8 +3184,23 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent) unwritten_start += logical_len; clear_extent_uptodate(io_tree, unwritten_start, end, NULL); - /* Drop extent maps for the part of the extent we didn't write. */ - btrfs_drop_extent_map_range(inode, unwritten_start, end, false); + /* + * Drop extent maps for the part of the extent we didn't write. + * + * We have an exception here for the free_space_inode, this is + * because when we do btrfs_get_extent() on the free space inode + * we will search the commit root. If this is a new block group + * we won't find anything, and we will trip over the assert in + * writepage where we do ASSERT(em->block_start != + * EXTENT_MAP_HOLE). + * + * Theoretically we could also skip this for any NOCOW extent as + * we don't mess with the extent map tree in the NOCOW case, but + * for now simply skip this if we are the free space inode. + */ + if (!btrfs_is_free_space_inode(inode)) + btrfs_drop_extent_map_range(inode, unwritten_start, + end, false); /* * If the ordered extent had an IOERR or something else went -- GitLab From 1693d5442c458ae8d5b0d58463b873cd879569ed Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 25 Jan 2024 09:53:06 +0000 Subject: [PATCH 0888/1405] btrfs: add and use helper to check if block group is used Add a helper function to determine if a block group is being used and make use of it at btrfs_delete_unused_bgs(). This helper will also be used in future code changes. Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 3 +-- fs/btrfs/block-group.h | 7 +++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index a9be9ac992222..9daef18bcbbc0 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1512,8 +1512,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) } spin_lock(&block_group->lock); - if (block_group->reserved || block_group->pinned || - block_group->used || block_group->ro || + if (btrfs_is_block_group_used(block_group) || block_group->ro || list_is_singular(&block_group->list)) { /* * We want to bail if we made new allocations or have diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index c4a1f01cc1c24..962b11983901a 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -257,6 +257,13 @@ static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group) return (block_group->start + block_group->length); } +static inline bool btrfs_is_block_group_used(const struct btrfs_block_group *bg) +{ + lockdep_assert_held(&bg->lock); + + return (bg->used > 0 || bg->reserved > 0 || bg->pinned > 0); +} + static inline bool btrfs_is_block_group_data_only( struct btrfs_block_group *block_group) { -- GitLab From f4a9f219411f318ae60d6ff7f129082a75686c6c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 25 Jan 2024 09:53:14 +0000 Subject: [PATCH 0889/1405] btrfs: do not delete unused block group if it may be used soon Before deleting a block group that is in the list of unused block groups (fs_info->unused_bgs), we check if the block group became used before deleting it, as extents from it may have been allocated after it was added to the list. However even if the block group was not yet used, there may be tasks that have only reserved space and have not yet allocated extents, and they might be relying on the availability of the unused block group in order to allocate extents. The reservation works first by increasing the "bytes_may_use" field of the corresponding space_info object (which may first require flushing delayed items, allocating a new block group, etc), and only later a task does the actual allocation of extents. For metadata we usually don't end up using all reserved space, as we are pessimistic and typically account for the worst cases (need to COW every single node in a path of a tree at maximum possible height, etc). For data we usually reserve the exact amount of space we're going to allocate later, except when using compression where we always reserve space based on the uncompressed size, as compression is only triggered when writeback starts so we don't know in advance how much space we'll actually need, or if the data is compressible. So don't delete an unused block group if the total size of its space_info object minus the block group's size is less then the sum of used space and space that may be used (space_info->bytes_may_use), as that means we have tasks that reserved space and may need to allocate extents from the block group. In this case, besides skipping the deletion, re-add the block group to the list of unused block groups so that it may be reconsidered later, in case the tasks that reserved space end up not needing to allocate extents from it. Allowing the deletion of the block group while we have reserved space, can result in tasks failing to allocate metadata extents (-ENOSPC) while under a transaction handle, resulting in a transaction abort, or failure during writeback for the case of data extents. CC: stable@vger.kernel.org # 6.0+ Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 46 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 9daef18bcbbc0..5fe37bc82f117 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1455,6 +1455,7 @@ static bool clean_pinned_extents(struct btrfs_trans_handle *trans, */ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) { + LIST_HEAD(retry_list); struct btrfs_block_group *block_group; struct btrfs_space_info *space_info; struct btrfs_trans_handle *trans; @@ -1476,6 +1477,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) spin_lock(&fs_info->unused_bgs_lock); while (!list_empty(&fs_info->unused_bgs)) { + u64 used; int trimming; block_group = list_first_entry(&fs_info->unused_bgs, @@ -1511,6 +1513,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) goto next; } + spin_lock(&space_info->lock); spin_lock(&block_group->lock); if (btrfs_is_block_group_used(block_group) || block_group->ro || list_is_singular(&block_group->list)) { @@ -1522,10 +1525,49 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) */ trace_btrfs_skip_unused_block_group(block_group); spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); + up_write(&space_info->groups_sem); + goto next; + } + + /* + * The block group may be unused but there may be space reserved + * accounting with the existence of that block group, that is, + * space_info->bytes_may_use was incremented by a task but no + * space was yet allocated from the block group by the task. + * That space may or may not be allocated, as we are generally + * pessimistic about space reservation for metadata as well as + * for data when using compression (as we reserve space based on + * the worst case, when data can't be compressed, and before + * actually attempting compression, before starting writeback). + * + * So check if the total space of the space_info minus the size + * of this block group is less than the used space of the + * space_info - if that's the case, then it means we have tasks + * that might be relying on the block group in order to allocate + * extents, and add back the block group to the unused list when + * we finish, so that we retry later in case no tasks ended up + * needing to allocate extents from the block group. + */ + used = btrfs_space_info_used(space_info, true); + if (space_info->total_bytes - block_group->length < used) { + /* + * Add a reference for the list, compensate for the ref + * drop under the "next" label for the + * fs_info->unused_bgs list. + */ + btrfs_get_block_group(block_group); + list_add_tail(&block_group->bg_list, &retry_list); + + trace_btrfs_skip_unused_block_group(block_group); + spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); up_write(&space_info->groups_sem); goto next; } + spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); /* We don't want to force the issue, only flip if it's ok. */ ret = inc_block_group_ro(block_group, 0); @@ -1649,12 +1691,16 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) btrfs_put_block_group(block_group); spin_lock(&fs_info->unused_bgs_lock); } + list_splice_tail(&retry_list, &fs_info->unused_bgs); spin_unlock(&fs_info->unused_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock); return; flip_async: btrfs_end_transaction(trans); + spin_lock(&fs_info->unused_bgs_lock); + list_splice_tail(&retry_list, &fs_info->unused_bgs); + spin_unlock(&fs_info->unused_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock); btrfs_put_block_group(block_group); btrfs_discard_punt_unused_bgs_list(fs_info); -- GitLab From 12c5128f101bfa47a08e4c0e1a75cfa2d0872bcd Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 25 Jan 2024 09:53:19 +0000 Subject: [PATCH 0890/1405] btrfs: add new unused block groups to the list of unused block groups Space reservations for metadata are, most of the time, pessimistic as we reserve space for worst possible cases - where tree heights are at the maximum possible height (8), we need to COW every extent buffer in a tree path, need to split extent buffers, etc. For data, we generally reserve the exact amount of space we are going to allocate. The exception here is when using compression, in which case we reserve space matching the uncompressed size, as the compression only happens at writeback time and in the worst possible case we need that amount of space in case the data is not compressible. This means that when there's not available space in the corresponding space_info object, we may need to allocate a new block group, and then that block group might not be used after all. In this case the block group is never added to the list of unused block groups and ends up never being deleted - except if we unmount and mount again the fs, as when reading block groups from disk we add unused ones to the list of unused block groups (fs_info->unused_bgs). Otherwise a block group is only added to the list of unused block groups when we deallocate the last extent from it, so if no extent is ever allocated, the block group is kept around forever. This also means that if we have a bunch of tasks reserving space in parallel we can end up allocating many block groups that end up never being used or kept around for too long without being used, which has the potential to result in ENOSPC failures in case for example we over allocate too many metadata block groups and then end up in a state without enough unallocated space to allocate a new data block group. This is more likely to happen with metadata reservations as of kernel 6.7, namely since commit 28270e25c69a ("btrfs: always reserve space for delayed refs when starting transaction"), because we started to always reserve space for delayed references when starting a transaction handle for a non-zero number of items, and also to try to reserve space to fill the gap between the delayed block reserve's reserved space and its size. So to avoid this, when finishing the creation a new block group, add the block group to the list of unused block groups if it's still unused at that time. This way the next time the cleaner kthread runs, it will delete the block group if it's still unused and not needed to satisfy existing space reservations. Reported-by: Ivan Shapovalov Link: https://lore.kernel.org/linux-btrfs/9cdbf0ca9cdda1b4c84e15e548af7d7f9f926382.camel@intelfx.name/ CC: stable@vger.kernel.org # 6.7+ Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 5fe37bc82f117..378d9103a2072 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2729,6 +2729,37 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans) btrfs_dec_delayed_refs_rsv_bg_inserts(fs_info); list_del_init(&block_group->bg_list); clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags); + + /* + * If the block group is still unused, add it to the list of + * unused block groups. The block group may have been created in + * order to satisfy a space reservation, in which case the + * extent allocation only happens later. But often we don't + * actually need to allocate space that we previously reserved, + * so the block group may become unused for a long time. For + * example for metadata we generally reserve space for a worst + * possible scenario, but then don't end up allocating all that + * space or none at all (due to no need to COW, extent buffers + * were already COWed in the current transaction and still + * unwritten, tree heights lower than the maximum possible + * height, etc). For data we generally reserve the axact amount + * of space we are going to allocate later, the exception is + * when using compression, as we must reserve space based on the + * uncompressed data size, because the compression is only done + * when writeback triggered and we don't know how much space we + * are actually going to need, so we reserve the uncompressed + * size because the data may be uncompressible in the worst case. + */ + if (ret == 0) { + bool used; + + spin_lock(&block_group->lock); + used = btrfs_is_block_group_used(block_group); + spin_unlock(&block_group->lock); + + if (!used) + btrfs_mark_bg_unused(block_group); + } } btrfs_trans_release_chunk_metadata(trans); } -- GitLab From 38ee0cb2a2e2ade077442085638eb181b0562971 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 8 Feb 2024 14:31:10 +0200 Subject: [PATCH 0891/1405] selftests: net: Fix bridge backup port test flakiness The test toggles the carrier of a bridge port in order to test the bridge backup port feature. Due to the linkwatch delayed work the carrier change is not always reflected fast enough to the bridge driver and packets are not forwarded as the test expects, resulting in failures [1]. Fix by busy waiting on the bridge port state until it changes to the desired state following the carrier change. [1] # Backup port # ----------- [...] # TEST: swp1 carrier off [ OK ] # TEST: No forwarding out of swp1 [FAIL] [ 641.995910] br0: port 1(swp1) entered disabled state # TEST: No forwarding out of vx0 [ OK ] Fixes: b408453053fb ("selftests: net: Add bridge backup port and backup nexthop ID test") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Acked-by: Paolo Abeni Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20240208123110.1063930-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- .../selftests/net/test_bridge_backup_port.sh | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh index 70a7d87ba2d21..1b3f89e2b86e6 100755 --- a/tools/testing/selftests/net/test_bridge_backup_port.sh +++ b/tools/testing/selftests/net/test_bridge_backup_port.sh @@ -124,6 +124,16 @@ tc_check_packets() [[ $pkts == $count ]] } +bridge_link_check() +{ + local ns=$1; shift + local dev=$1; shift + local state=$1; shift + + bridge -n $ns -d -j link show dev $dev | \ + jq -e ".[][\"state\"] == \"$state\"" &> /dev/null +} + ################################################################################ # Setup @@ -259,6 +269,7 @@ backup_port() log_test $? 0 "No forwarding out of vx0" run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled log_test $? 0 "swp1 carrier off" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" @@ -268,6 +279,7 @@ backup_port() log_test $? 0 "No forwarding out of vx0" run_cmd "ip -n $sw1 link set dev swp1 carrier on" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding log_test $? 0 "swp1 carrier on" # Configure vx0 as the backup port of swp1 and check that packets are @@ -284,6 +296,7 @@ backup_port() log_test $? 0 "No forwarding out of vx0" run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled log_test $? 0 "swp1 carrier off" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" @@ -293,6 +306,7 @@ backup_port() log_test $? 0 "Forwarding out of vx0" run_cmd "ip -n $sw1 link set dev swp1 carrier on" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding log_test $? 0 "swp1 carrier on" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" @@ -314,6 +328,7 @@ backup_port() log_test $? 0 "No forwarding out of vx0" run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled log_test $? 0 "swp1 carrier off" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" @@ -369,6 +384,7 @@ backup_nhid() log_test $? 0 "No forwarding out of vx0" run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled log_test $? 0 "swp1 carrier off" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" @@ -382,6 +398,7 @@ backup_nhid() log_test $? 0 "Forwarding using VXLAN FDB entry" run_cmd "ip -n $sw1 link set dev swp1 carrier on" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding log_test $? 0 "swp1 carrier on" # Configure nexthop ID 10 as the backup nexthop ID of swp1 and check @@ -398,6 +415,7 @@ backup_nhid() log_test $? 0 "No forwarding out of vx0" run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled log_test $? 0 "swp1 carrier off" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" @@ -411,6 +429,7 @@ backup_nhid() log_test $? 0 "No forwarding using VXLAN FDB entry" run_cmd "ip -n $sw1 link set dev swp1 carrier on" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding log_test $? 0 "swp1 carrier on" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" @@ -441,6 +460,7 @@ backup_nhid() log_test $? 0 "No forwarding using VXLAN FDB entry" run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled log_test $? 0 "swp1 carrier off" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" @@ -497,6 +517,7 @@ backup_nhid_invalid() log_test $? 0 "Valid nexthop as backup nexthop" run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled log_test $? 0 "swp1 carrier off" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" @@ -604,7 +625,9 @@ backup_nhid_ping() run_cmd "bridge -n $sw2 link set dev swp1 backup_nhid 10" run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled run_cmd "ip -n $sw2 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw2 swp1 disabled run_cmd "ip netns exec $sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66" log_test $? 0 "Ping with backup nexthop ID" -- GitLab From 93590849a05edffaefa11695fab98f621259ded2 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 8 Feb 2024 17:55:26 +0200 Subject: [PATCH 0892/1405] selftests: forwarding: Fix layer 2 miss test flakiness After enabling a multicast querier on the bridge (like the test is doing), the bridge will wait for the Max Response Delay before starting to forward according to its MDB in order to let Membership Reports enough time to be received and processed. Currently, the test is waiting for exactly the default Max Response Delay (10 seconds) which is racy and leads to failures [1]. Fix by reducing the Max Response Delay to 1 second. [1] [...] # TEST: L2 miss - Multicast (IPv4) [FAIL] # Unregistered multicast filter was hit after adding MDB entry Fixes: 8c33266ae26a ("selftests: forwarding: Add layer 2 miss test cases") Signed-off-by: Ido Schimmel Reviewed-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20240208155529.1199729-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/forwarding/tc_flower_l2_miss.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh index 20a7cb7222b8b..c2420bb72c128 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh @@ -209,14 +209,17 @@ test_l2_miss_multicast() # both registered and unregistered multicast traffic. bridge link set dev $swp2 mcast_router 2 + # Set the Max Response Delay to 100 centiseconds (1 second) so that the + # bridge will start forwarding according to its MDB soon after a + # multicast querier is enabled. + ip link set dev br1 type bridge mcast_query_response_interval 100 + # Forwarding according to MDB entries only takes place when the bridge # detects that there is a valid querier in the network. Set the bridge # as the querier and assign it a valid IPv6 link-local address to be # used as the source address for MLD queries. ip link set dev br1 type bridge mcast_querier 1 ip -6 address add fe80::1/64 nodad dev br1 - # Wait the default Query Response Interval (10 seconds) for the bridge - # to determine that there are no other queriers in the network. sleep 10 test_l2_miss_multicast_ipv4 @@ -224,6 +227,7 @@ test_l2_miss_multicast() ip -6 address del fe80::1/64 dev br1 ip link set dev br1 type bridge mcast_querier 0 + ip link set dev br1 type bridge mcast_query_response_interval 1000 bridge link set dev $swp2 mcast_router 1 } -- GitLab From 7399e2ce4d424f426417496eb289458780eea985 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 8 Feb 2024 17:55:27 +0200 Subject: [PATCH 0893/1405] selftests: forwarding: Fix bridge MDB test flakiness After enabling a multicast querier on the bridge (like the test is doing), the bridge will wait for the Max Response Delay before starting to forward according to its MDB in order to let Membership Reports enough time to be received and processed. Currently, the test is waiting for exactly the default Max Response Delay (10 seconds) which is racy and leads to failures [1]. Fix by reducing the Max Response Delay to 1 second. [1] [...] # TEST: IPv4 host entries forwarding tests [FAIL] # Packet locally received after flood Fixes: b6d00da08610 ("selftests: forwarding: Add bridge MDB test") Signed-off-by: Ido Schimmel Reviewed-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20240208155529.1199729-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/bridge_mdb.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index 61348f71728cd..f94bfb6f45ecb 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -1252,14 +1252,17 @@ fwd_test() echo log_info "# Forwarding tests" + # Set the Max Response Delay to 100 centiseconds (1 second) so that the + # bridge will start forwarding according to its MDB soon after a + # multicast querier is enabled. + ip link set dev br0 type bridge mcast_query_response_interval 100 + # Forwarding according to MDB entries only takes place when the bridge # detects that there is a valid querier in the network. Set the bridge # as the querier and assign it a valid IPv6 link-local address to be # used as the source address for MLD queries. ip -6 address add fe80::1/64 nodad dev br0 ip link set dev br0 type bridge mcast_querier 1 - # Wait the default Query Response Interval (10 seconds) for the bridge - # to determine that there are no other queriers in the network. sleep 10 fwd_test_host @@ -1267,6 +1270,7 @@ fwd_test() ip link set dev br0 type bridge mcast_querier 0 ip -6 address del fe80::1/64 dev br0 + ip link set dev br0 type bridge mcast_query_response_interval 1000 } ctrl_igmpv3_is_in_test() -- GitLab From dd6b34589441f2ad4698dd88a664811550148b41 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 8 Feb 2024 17:55:28 +0200 Subject: [PATCH 0894/1405] selftests: forwarding: Suppress grep warnings Suppress the following grep warnings: [...] INFO: # Port group entries configuration tests - (*, G) TEST: Common port group entries configuration tests (IPv4 (*, G)) [ OK ] TEST: Common port group entries configuration tests (IPv6 (*, G)) [ OK ] grep: warning: stray \ before / grep: warning: stray \ before / grep: warning: stray \ before / TEST: IPv4 (*, G) port group entries configuration tests [ OK ] grep: warning: stray \ before / grep: warning: stray \ before / grep: warning: stray \ before / TEST: IPv6 (*, G) port group entries configuration tests [ OK ] [...] They do not fail the test, but do clutter the output. Fixes: b6d00da08610 ("selftests: forwarding: Add bridge MDB test") Signed-off-by: Ido Schimmel Reviewed-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20240208155529.1199729-4-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/bridge_mdb.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index f94bfb6f45ecb..d9d587454d207 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -329,7 +329,7 @@ __cfg_test_port_ip_star_g() bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00" check_err $? "(*, G) \"permanent\" entry has a pending group timer" - bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00" + bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00" check_err $? "\"permanent\" source entry has a pending source timer" bridge mdb del dev br0 port $swp1 grp $grp vid 10 @@ -346,7 +346,7 @@ __cfg_test_port_ip_star_g() bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00" check_fail $? "(*, G) EXCLUDE entry does not have a pending group timer" - bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00" + bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00" check_err $? "\"blocked\" source entry has a pending source timer" bridge mdb del dev br0 port $swp1 grp $grp vid 10 @@ -363,7 +363,7 @@ __cfg_test_port_ip_star_g() bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00" check_err $? "(*, G) INCLUDE entry has a pending group timer" - bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00" + bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00" check_fail $? "Source entry does not have a pending source timer" bridge mdb del dev br0 port $swp1 grp $grp vid 10 -- GitLab From f97f1fcc96908c97a240ff6cb4474e155abfa0d7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 8 Feb 2024 17:55:29 +0200 Subject: [PATCH 0895/1405] selftests: forwarding: Fix bridge locked port test flakiness The redirection test case fails in the netdev CI on debug kernels because an FDB entry is learned despite the presence of a tc filter that redirects incoming traffic [1]. I am unable to reproduce the failure locally, but I can see how it can happen given that learning is first enabled and only then the ingress tc filter is configured. On debug kernels the time window between these two operations is longer compared to regular kernels, allowing random packets to be transmitted and trigger learning. Fix by reversing the order and configure the ingress tc filter before enabling learning. [1] [...] # TEST: Locked port MAB redirect [FAIL] # Locked entry created for redirected traffic Fixes: 38c43a1ce758 ("selftests: forwarding: Add test case for traffic redirection from a locked port") Signed-off-by: Ido Schimmel Reviewed-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20240208155529.1199729-5-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/bridge_locked_port.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh index 9af9f6964808b..c62331b2e0060 100755 --- a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh +++ b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh @@ -327,10 +327,10 @@ locked_port_mab_redirect() RET=0 check_port_mab_support || return 0 - bridge link set dev $swp1 learning on locked on mab on tc qdisc add dev $swp1 clsact tc filter add dev $swp1 ingress protocol all pref 1 handle 101 flower \ action mirred egress redirect dev $swp2 + bridge link set dev $swp1 learning on locked on mab on ping_do $h1 192.0.2.2 check_err $? "Ping did not work with redirection" @@ -349,8 +349,8 @@ locked_port_mab_redirect() check_err $? "Locked entry not created after deleting filter" bridge fdb del `mac_get $h1` vlan 1 dev $swp1 master - tc qdisc del dev $swp1 clsact bridge link set dev $swp1 learning off locked off mab off + tc qdisc del dev $swp1 clsact log_test "Locked port MAB redirect" } -- GitLab From c6e02eefd6ace3da3369c764f15429f5647056af Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Tue, 6 Feb 2024 15:00:46 +0000 Subject: [PATCH 0896/1405] cifs: change tcon status when need_reconnect is set on it When a tcon is marked for need_reconnect, the intention is to have it reconnected. This change adjusts tcon->status in cifs_tree_connect when need_reconnect is set. Also, this change has a minor correction in resetting need_reconnect on success. It makes sure that it is done with tc_lock held. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/connect.c | 5 +++++ fs/smb/client/dfs.c | 7 ++++++- fs/smb/client/file.c | 3 +++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index bfd568f897105..01c49b29c4996 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -4228,6 +4228,11 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru /* only send once per connect */ spin_lock(&tcon->tc_lock); + + /* if tcon is marked for needing reconnect, update state */ + if (tcon->need_reconnect) + tcon->status = TID_NEED_TCON; + if (tcon->status == TID_GOOD) { spin_unlock(&tcon->tc_lock); return 0; diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c index a8a1d386da656..449c59830039b 100644 --- a/fs/smb/client/dfs.c +++ b/fs/smb/client/dfs.c @@ -565,6 +565,11 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru /* only send once per connect */ spin_lock(&tcon->tc_lock); + + /* if tcon is marked for needing reconnect, update state */ + if (tcon->need_reconnect) + tcon->status = TID_NEED_TCON; + if (tcon->status == TID_GOOD) { spin_unlock(&tcon->tc_lock); return 0; @@ -625,8 +630,8 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru spin_lock(&tcon->tc_lock); if (tcon->status == TID_IN_TCON) tcon->status = TID_GOOD; - spin_unlock(&tcon->tc_lock); tcon->need_reconnect = false; + spin_unlock(&tcon->tc_lock); } return rc; diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index b75282c204dad..f391c9b803d84 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -175,6 +175,9 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon) /* only send once per connect */ spin_lock(&tcon->tc_lock); + if (tcon->need_reconnect) + tcon->status = TID_NEED_RECON; + if (tcon->status != TID_NEED_RECON) { spin_unlock(&tcon->tc_lock); return; -- GitLab From a39c757bf0596b17482a507f31c3ef0af0d1d2b4 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Tue, 6 Feb 2024 15:00:47 +0000 Subject: [PATCH 0897/1405] cifs: handle cases where multiple sessions share connection Based on our implementation of multichannel, it is entirely possible that a server struct may not be found in any channel of an SMB session. In such cases, we should be prepared to move on and search for the server struct in the next session. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/connect.c | 6 ++++++ fs/smb/client/sess.c | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 01c49b29c4996..d03253f8f1455 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -233,6 +233,12 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) { /* check if iface is still active */ spin_lock(&ses->chan_lock); + if (cifs_ses_get_chan_index(ses, server) == + CIFS_INVAL_CHAN_INDEX) { + spin_unlock(&ses->chan_lock); + continue; + } + if (!cifs_chan_is_iface_active(ses, server)) { spin_unlock(&ses->chan_lock); cifs_chan_update_iface(ses, server); diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index 476d54fceb50f..8f37373fd3334 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -88,7 +88,6 @@ cifs_ses_get_chan_index(struct cifs_ses *ses, if (server) cifs_dbg(VFS, "unable to get chan index for server: 0x%llx", server->conn_id); - WARN_ON(1); return CIFS_INVAL_CHAN_INDEX; } -- GitLab From a5cc98eba2592d6e3c5a4351319595ddde2a5901 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 6 Feb 2024 23:57:18 -0600 Subject: [PATCH 0898/1405] smb3: clarify mount warning When a user tries to use the "sec=krb5p" mount parameter to encrypt data on connection to a server (when authenticating with Kerberos), we indicate that it is not supported, but do not note the equivalent recommended mount parameter ("sec=krb5,seal") which turns on encryption for that mount (and uses Kerberos for auth). Update the warning message. Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 52cbef2eeb28f..aec8dbd1f9dbd 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -211,7 +211,7 @@ cifs_parse_security_flavors(struct fs_context *fc, char *value, struct smb3_fs_c switch (match_token(value, cifs_secflavor_tokens, args)) { case Opt_sec_krb5p: - cifs_errorf(fc, "sec=krb5p is not supported!\n"); + cifs_errorf(fc, "sec=krb5p is not supported. Use sec=krb5,seal instead\n"); return 1; case Opt_sec_krb5i: ctx->sign = true; -- GitLab From 6e2f90d31fe09f2b852de25125ca875aabd81367 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 7 Feb 2024 08:24:15 -0500 Subject: [PATCH 0899/1405] net: openvswitch: limit the number of recursions from action sets The ovs module allows for some actions to recursively contain an action list for complex scenarios, such as sampling, checking lengths, etc. When these actions are copied into the internal flow table, they are evaluated to validate that such actions make sense, and these calls happen recursively. The ovs-vswitchd userspace won't emit more than 16 recursion levels deep. However, the module has no such limit and will happily accept limits larger than 16 levels nested. Prevent this by tracking the number of recursions happening and manually limiting it to 16 levels nested. The initial implementation of the sample action would track this depth and prevent more than 3 levels of recursion, but this was removed to support the clone use case, rather than limited at the current userspace limit. Fixes: 798c166173ff ("openvswitch: Optimize sample action for the clone use cases") Signed-off-by: Aaron Conole Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240207132416.1488485-2-aconole@redhat.com Signed-off-by: Jakub Kicinski --- net/openvswitch/flow_netlink.c | 49 +++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 88965e2068ac6..ebc5728aab4ea 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -48,6 +48,7 @@ struct ovs_len_tbl { #define OVS_ATTR_NESTED -1 #define OVS_ATTR_VARIABLE -2 +#define OVS_COPY_ACTIONS_MAX_DEPTH 16 static bool actions_may_change_flow(const struct nlattr *actions) { @@ -2545,13 +2546,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log); + u32 mpls_label_count, bool log, + u32 depth); static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log, bool last) + u32 mpls_label_count, bool log, bool last, + u32 depth) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; @@ -2602,7 +2605,8 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, return err; err = __ovs_nla_copy_actions(net, actions, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -2617,7 +2621,8 @@ static int validate_and_copy_dec_ttl(struct net *net, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log) + u32 mpls_label_count, bool log, + u32 depth) { const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1]; int start, action_start, err, rem; @@ -2660,7 +2665,8 @@ static int validate_and_copy_dec_ttl(struct net *net, return action_start; err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type, - vlan_tci, mpls_label_count, log); + vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -2674,7 +2680,8 @@ static int validate_and_copy_clone(struct net *net, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log, bool last) + u32 mpls_label_count, bool log, bool last, + u32 depth) { int start, err; u32 exec; @@ -2694,7 +2701,8 @@ static int validate_and_copy_clone(struct net *net, return err; err = __ovs_nla_copy_actions(net, attr, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -3063,7 +3071,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, u32 mpls_label_count, - bool log, bool last) + bool log, bool last, u32 depth) { const struct nlattr *acts_if_greater, *acts_if_lesser_eq; struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1]; @@ -3111,7 +3119,8 @@ static int validate_and_copy_check_pkt_len(struct net *net, return nested_acts_start; err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -3124,7 +3133,8 @@ static int validate_and_copy_check_pkt_len(struct net *net, return nested_acts_start; err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -3152,12 +3162,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log) + u32 mpls_label_count, bool log, + u32 depth) { u8 mac_proto = ovs_key_mac_proto(key); const struct nlattr *a; int rem, err; + if (depth > OVS_COPY_ACTIONS_MAX_DEPTH) + return -EOVERFLOW; + nla_for_each_nested(a, attr, rem) { /* Expected argument lengths, (u32)-1 for variable length. */ static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { @@ -3355,7 +3369,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, err = validate_and_copy_sample(net, a, key, sfa, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, depth); if (err) return err; skip_copy = true; @@ -3426,7 +3440,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, err = validate_and_copy_clone(net, a, key, sfa, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, depth); if (err) return err; skip_copy = true; @@ -3440,7 +3454,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, + depth); if (err) return err; skip_copy = true; @@ -3450,7 +3465,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, case OVS_ACTION_ATTR_DEC_TTL: err = validate_and_copy_dec_ttl(net, a, key, sfa, eth_type, vlan_tci, - mpls_label_count, log); + mpls_label_count, log, + depth); if (err) return err; skip_copy = true; @@ -3495,7 +3511,8 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, (*sfa)->orig_len = nla_len(attr); err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, - key->eth.vlan.tci, mpls_label_count, log); + key->eth.vlan.tci, mpls_label_count, log, + 0); if (err) ovs_nla_free_flow_actions(*sfa); -- GitLab From bd128f62c365504e1268dc09fcccdfb1f091e93a Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 7 Feb 2024 08:24:16 -0500 Subject: [PATCH 0900/1405] selftests: openvswitch: Add validation for the recursion test Add a test case into the netlink checks that will show the number of nested action recursions won't exceed 16. Going to 17 on a small clone call isn't enough to exhaust the stack on (most) systems, so it should be safe to run even on systems that don't have the fix applied. Signed-off-by: Aaron Conole Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240207132416.1488485-3-aconole@redhat.com Signed-off-by: Jakub Kicinski --- .../selftests/net/openvswitch/openvswitch.sh | 13 ++++ .../selftests/net/openvswitch/ovs-dpctl.py | 71 +++++++++++++++---- 2 files changed, 69 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index f8499d4c87f3f..36e40256ab92a 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -502,7 +502,20 @@ test_netlink_checks () { wc -l) == 2 ] || \ return 1 + info "Checking clone depth" ERR_MSG="Flow actions may not be safe on all matching packets" + PRE_TEST=$(dmesg | grep -c "${ERR_MSG}") + ovs_add_flow "test_netlink_checks" nv0 \ + 'in_port(1),eth(),eth_type(0x800),ipv4()' \ + 'clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(drop)))))))))))))))))' \ + >/dev/null 2>&1 && return 1 + POST_TEST=$(dmesg | grep -c "${ERR_MSG}") + + if [ "$PRE_TEST" == "$POST_TEST" ]; then + info "failed - clone depth too large" + return 1 + fi + PRE_TEST=$(dmesg | grep -c "${ERR_MSG}") ovs_add_flow "test_netlink_checks" nv0 \ 'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(0),2' \ diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index b97e621face95..5e0e539a323d5 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -299,7 +299,7 @@ class ovsactions(nla): ("OVS_ACTION_ATTR_PUSH_NSH", "none"), ("OVS_ACTION_ATTR_POP_NSH", "flag"), ("OVS_ACTION_ATTR_METER", "none"), - ("OVS_ACTION_ATTR_CLONE", "none"), + ("OVS_ACTION_ATTR_CLONE", "recursive"), ("OVS_ACTION_ATTR_CHECK_PKT_LEN", "none"), ("OVS_ACTION_ATTR_ADD_MPLS", "none"), ("OVS_ACTION_ATTR_DEC_TTL", "none"), @@ -465,29 +465,42 @@ class ovsactions(nla): print_str += "pop_mpls" else: datum = self.get_attr(field[0]) - print_str += datum.dpstr(more) + if field[0] == "OVS_ACTION_ATTR_CLONE": + print_str += "clone(" + print_str += datum.dpstr(more) + print_str += ")" + else: + print_str += datum.dpstr(more) return print_str def parse(self, actstr): + totallen = len(actstr) while len(actstr) != 0: parsed = False + parencount = 0 if actstr.startswith("drop"): # If no reason is provided, the implicit drop is used (i.e no # action). If some reason is given, an explicit action is used. - actstr, reason = parse_extract_field( - actstr, - "drop(", - "([0-9]+)", - lambda x: int(x, 0), - False, - None, - ) + reason = None + if actstr.startswith("drop("): + parencount += 1 + + actstr, reason = parse_extract_field( + actstr, + "drop(", + "([0-9]+)", + lambda x: int(x, 0), + False, + None, + ) + if reason is not None: self["attrs"].append(["OVS_ACTION_ATTR_DROP", reason]) parsed = True else: - return + actstr = actstr[len("drop"): ] + return (totallen - len(actstr)) elif parse_starts_block(actstr, "^(\d+)", False, True): actstr, output = parse_extract_field( @@ -504,6 +517,7 @@ class ovsactions(nla): False, 0, ) + parencount += 1 self["attrs"].append(["OVS_ACTION_ATTR_RECIRC", recircid]) parsed = True @@ -516,12 +530,22 @@ class ovsactions(nla): for flat_act in parse_flat_map: if parse_starts_block(actstr, flat_act[0], False): - actstr += len(flat_act[0]) + actstr = actstr[len(flat_act[0]):] self["attrs"].append([flat_act[1]]) actstr = actstr[strspn(actstr, ", ") :] parsed = True - if parse_starts_block(actstr, "ct(", False): + if parse_starts_block(actstr, "clone(", False): + parencount += 1 + subacts = ovsactions() + actstr = actstr[len("clone("):] + parsedLen = subacts.parse(actstr) + lst = [] + self["attrs"].append(("OVS_ACTION_ATTR_CLONE", subacts)) + actstr = actstr[parsedLen:] + parsed = True + elif parse_starts_block(actstr, "ct(", False): + parencount += 1 actstr = actstr[len("ct(") :] ctact = ovsactions.ctact() @@ -553,6 +577,7 @@ class ovsactions(nla): natact = ovsactions.ctact.natattr() if actstr.startswith("("): + parencount += 1 t = None actstr = actstr[1:] if actstr.startswith("src"): @@ -607,15 +632,29 @@ class ovsactions(nla): actstr = actstr[strspn(actstr, ", ") :] ctact["attrs"].append(["OVS_CT_ATTR_NAT", natact]) - actstr = actstr[strspn(actstr, ",) ") :] + actstr = actstr[strspn(actstr, ", ") :] self["attrs"].append(["OVS_ACTION_ATTR_CT", ctact]) parsed = True - actstr = actstr[strspn(actstr, "), ") :] + actstr = actstr[strspn(actstr, ", ") :] + while parencount > 0: + parencount -= 1 + actstr = actstr[strspn(actstr, " "):] + if len(actstr) and actstr[0] != ")": + raise ValueError("Action str: '%s' unbalanced" % actstr) + actstr = actstr[1:] + + if len(actstr) and actstr[0] == ")": + return (totallen - len(actstr)) + + actstr = actstr[strspn(actstr, ", ") :] + if not parsed: raise ValueError("Action str: '%s' not supported" % actstr) + return (totallen - len(actstr)) + class ovskey(nla): nla_flags = NLA_F_NESTED @@ -2111,6 +2150,8 @@ def main(argv): ovsflow = OvsFlow() ndb = NDB() + sys.setrecursionlimit(100000) + if hasattr(args, "showdp"): found = False for iface in ndb.interfaces: -- GitLab From aae09a6c7783e28d1bcafee85e172fe411923b22 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Wed, 7 Feb 2024 19:29:02 -0300 Subject: [PATCH 0901/1405] net/sched: act_mirred: Don't zero blockid when net device is being deleted While testing tdc with parallel tests for mirred to block we caught an intermittent bug. The blockid was being zeroed out when a net device was deleted and, thus, giving us an incorrect blockid value whenever we tried to dump the mirred action. Since we don't increment the block refcount in the control path (and only use the ID), we don't need to zero the blockid field whenever a net device is going down. Fixes: 42f39036cda8 ("net/sched: act_mirred: Allow mirred to block") Signed-off-by: Victor Nogueira Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20240207222902.1469398-1-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/act_mirred.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 12386f590b0f6..0a1a9e40f2370 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -533,8 +533,6 @@ static int mirred_device_event(struct notifier_block *unused, * net_device are already rcu protected. */ RCU_INIT_POINTER(m->tcfm_dev, NULL); - } else if (m->tcfm_blockid) { - m->tcfm_blockid = 0; } spin_unlock_bh(&m->tcf_lock); } -- GitLab From 15faa1f67ab405d47789d4702f587ec7df7ef03e Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 6 Feb 2024 13:30:54 +0100 Subject: [PATCH 0902/1405] lan966x: Fix crash when adding interface under a lag There is a crash when adding one of the lan966x interfaces under a lag interface. The issue can be reproduced like this: ip link add name bond0 type bond miimon 100 mode balance-xor ip link set dev eth0 master bond0 The reason is because when adding a interface under the lag it would go through all the ports and try to figure out which other ports are under that lag interface. And the issue is that lan966x can have ports that are NULL pointer as they are not probed. So then iterating over these ports it would just crash as they are NULL pointers. The fix consists in actually checking for NULL pointers before accessing something from the ports. Like we do in other places. Fixes: cabc9d49333d ("net: lan966x: Add lag support for lan966x") Signed-off-by: Horatiu Vultur Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240206123054.3052966-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_lag.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c index 41fa2523d91d3..5f2cd9a8cf8fb 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_lag.c @@ -37,19 +37,24 @@ static void lan966x_lag_set_aggr_pgids(struct lan966x *lan966x) /* Now, set PGIDs for each active LAG */ for (lag = 0; lag < lan966x->num_phys_ports; ++lag) { - struct net_device *bond = lan966x->ports[lag]->bond; + struct lan966x_port *port = lan966x->ports[lag]; int num_active_ports = 0; + struct net_device *bond; unsigned long bond_mask; u8 aggr_idx[16]; - if (!bond || (visited & BIT(lag))) + if (!port || !port->bond || (visited & BIT(lag))) continue; + bond = port->bond; bond_mask = lan966x_lag_get_mask(lan966x, bond); for_each_set_bit(p, &bond_mask, lan966x->num_phys_ports) { struct lan966x_port *port = lan966x->ports[p]; + if (!port) + continue; + lan_wr(ANA_PGID_PGID_SET(bond_mask), lan966x, ANA_PGID(p)); if (port->lag_tx_active) -- GitLab From 2599bb5e0c742ba3de1af2abb56b8a103a671a22 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 8 Feb 2024 08:42:36 -0800 Subject: [PATCH 0903/1405] net: fill in MODULE_DESCRIPTION()s for xfrm W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the XFRM interface drivers. Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240208164244.3818498-2-leitao@debian.org Signed-off-by: Jakub Kicinski --- net/xfrm/xfrm_algo.c | 1 + net/xfrm/xfrm_user.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 41533c6314314..e6da7e8495c9c 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -858,4 +858,5 @@ int xfrm_count_pfkey_enc_supported(void) } EXPORT_SYMBOL_GPL(xfrm_count_pfkey_enc_supported); +MODULE_DESCRIPTION("XFRM Algorithm interface"); MODULE_LICENSE("GPL"); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ad01997c3aa9d..f037be190baea 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -3888,5 +3888,6 @@ static void __exit xfrm_user_exit(void) module_init(xfrm_user_init); module_exit(xfrm_user_exit); +MODULE_DESCRIPTION("XFRM User interface"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_XFRM); -- GitLab From f73f55b0fcff575fef1854c66d18767a341ebbe2 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 8 Feb 2024 08:42:37 -0800 Subject: [PATCH 0904/1405] net: fill in MODULE_DESCRIPTION()s for mpoa W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the Multi-Protocol Over ATM (MPOA) driver. Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240208164244.3818498-3-leitao@debian.org Signed-off-by: Jakub Kicinski --- net/atm/mpc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 033871e718a34..324e3ab96bb39 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -1532,4 +1532,5 @@ static void __exit atm_mpoa_cleanup(void) module_init(atm_mpoa_init); module_exit(atm_mpoa_cleanup); +MODULE_DESCRIPTION("Multi-Protocol Over ATM (MPOA) driver"); MODULE_LICENSE("GPL"); -- GitLab From 6e2cf0eb6926a5c51bba0aca819e91d7265c849c Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 8 Feb 2024 08:42:38 -0800 Subject: [PATCH 0905/1405] net: fill in MODULE_DESCRIPTION()s for af_key W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the PF_KEY socket helpers. Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240208164244.3818498-4-leitao@debian.org Signed-off-by: Jakub Kicinski --- net/key/af_key.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/key/af_key.c b/net/key/af_key.c index d68d01804dc7b..f79fb99271ed8 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3924,5 +3924,6 @@ static int __init ipsec_pfkey_init(void) module_init(ipsec_pfkey_init); module_exit(ipsec_pfkey_exit); +MODULE_DESCRIPTION("PF_KEY socket helpers"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_KEY); -- GitLab From 2898f3075e6a0b0584781272aac88377e5ced0a0 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 8 Feb 2024 08:42:39 -0800 Subject: [PATCH 0906/1405] net: fill in MODULE_DESCRIPTION()s for 6LoWPAN W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to IPv6 over Low power Wireless Personal Area Network. Signed-off-by: Breno Leitao Acked-by: Alexander Aring Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240208164244.3818498-5-leitao@debian.org Signed-off-by: Jakub Kicinski --- net/6lowpan/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c index 7b3341cef926e..850d4a185f55f 100644 --- a/net/6lowpan/core.c +++ b/net/6lowpan/core.c @@ -179,4 +179,5 @@ static void __exit lowpan_module_exit(void) module_init(lowpan_module_init); module_exit(lowpan_module_exit); +MODULE_DESCRIPTION("IPv6 over Low-Power Wireless Personal Area Network core module"); MODULE_LICENSE("GPL"); -- GitLab From 92ab08eb63bbf54caebb425ed8908758c98ae8f2 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 8 Feb 2024 08:42:40 -0800 Subject: [PATCH 0907/1405] net: fill in MODULE_DESCRIPTION()s for ipv6 modules W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the IPv6 modules. Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240208164244.3818498-6-leitao@debian.org Signed-off-by: Jakub Kicinski --- net/ipv6/ah6.c | 1 + net/ipv6/esp6.c | 1 + net/ipv6/ip6_udp_tunnel.c | 1 + net/ipv6/mip6.c | 1 + net/ipv6/sit.c | 1 + net/ipv6/tunnel6.c | 1 + net/ipv6/xfrm6_tunnel.c | 1 + 7 files changed, 7 insertions(+) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 2016e90e6e1d2..eb474f0987ae0 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -800,5 +800,6 @@ static void __exit ah6_fini(void) module_init(ah6_init); module_exit(ah6_fini); +MODULE_DESCRIPTION("IPv6 AH transformation helpers"); MODULE_LICENSE("GPL"); MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_AH); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 2cc1a45742d82..6e6efe026cdcc 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -1301,5 +1301,6 @@ static void __exit esp6_fini(void) module_init(esp6_init); module_exit(esp6_fini); +MODULE_DESCRIPTION("IPv6 ESP transformation helpers"); MODULE_LICENSE("GPL"); MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ESP); diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index a7bf0327b380b..c99053189ea8a 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -182,4 +182,5 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, } EXPORT_SYMBOL_GPL(udp_tunnel6_dst_lookup); +MODULE_DESCRIPTION("IPv6 Foo over UDP tunnel driver"); MODULE_LICENSE("GPL"); diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 83d2a8be263fb..6a16a5bd0d910 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -405,6 +405,7 @@ static void __exit mip6_fini(void) module_init(mip6_init); module_exit(mip6_fini); +MODULE_DESCRIPTION("IPv6 Mobility driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_DSTOPTS); MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ROUTING); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index cc24cefdb85c0..5e9f625b76e36 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1956,6 +1956,7 @@ static int __init sit_init(void) module_init(sit_init); module_exit(sit_cleanup); +MODULE_DESCRIPTION("IPv6-in-IPv4 tunnel SIT driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("sit"); MODULE_ALIAS_NETDEV("sit0"); diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 00e8d8b1c9a75..dc4ea9b11794e 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -302,4 +302,5 @@ static void __exit tunnel6_fini(void) module_init(tunnel6_init); module_exit(tunnel6_fini); +MODULE_DESCRIPTION("IP-in-IPv6 tunnel driver"); MODULE_LICENSE("GPL"); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 1323f2f6928e2..f6cb94f82cc3a 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -401,5 +401,6 @@ static void __exit xfrm6_tunnel_fini(void) module_init(xfrm6_tunnel_init); module_exit(xfrm6_tunnel_fini); +MODULE_DESCRIPTION("IPv6 XFRM tunnel driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_IPV6); -- GitLab From b058a5d25d921af2be83d70844d389ecfd4a0497 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 8 Feb 2024 08:42:41 -0800 Subject: [PATCH 0908/1405] net: fill in MODULE_DESCRIPTION()s for ipv4 modules W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the IPv4 modules. Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240208164244.3818498-7-leitao@debian.org Signed-off-by: Jakub Kicinski --- net/ipv4/ah4.c | 1 + net/ipv4/esp4.c | 1 + net/ipv4/ip_gre.c | 1 + net/ipv4/ip_tunnel.c | 1 + net/ipv4/ip_vti.c | 1 + net/ipv4/ipip.c | 1 + net/ipv4/tunnel4.c | 1 + net/ipv4/udp_tunnel_core.c | 1 + net/ipv4/xfrm4_tunnel.c | 1 + 9 files changed, 9 insertions(+) diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index a2e6e1fdf82be..64aec3dff8ec8 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -597,5 +597,6 @@ static void __exit ah4_fini(void) module_init(ah4_init); module_exit(ah4_fini); +MODULE_DESCRIPTION("IPv4 AH transformation library"); MODULE_LICENSE("GPL"); MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_AH); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 4ccfc104f13a5..4dd9e50406720 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -1247,5 +1247,6 @@ static void __exit esp4_fini(void) module_init(esp4_init); module_exit(esp4_fini); +MODULE_DESCRIPTION("IPv4 ESP transformation library"); MODULE_LICENSE("GPL"); MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_ESP); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 5169c3c72cffe..6b9cf5a24c19f 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1793,6 +1793,7 @@ static void __exit ipgre_fini(void) module_init(ipgre_init); module_exit(ipgre_fini); +MODULE_DESCRIPTION("IPv4 GRE tunnels over IP library"); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("gre"); MODULE_ALIAS_RTNL_LINK("gretap"); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index beeae624c412d..a4513ffb66cbb 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -1298,4 +1298,5 @@ void ip_tunnel_setup(struct net_device *dev, unsigned int net_id) } EXPORT_SYMBOL_GPL(ip_tunnel_setup); +MODULE_DESCRIPTION("IPv4 tunnel implementation library"); MODULE_LICENSE("GPL"); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 9ab9b3ebe0cd1..d1d6bb28ed6e9 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -721,6 +721,7 @@ static void __exit vti_fini(void) module_init(vti_init); module_exit(vti_fini); +MODULE_DESCRIPTION("Virtual (secure) IP tunneling library"); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("vti"); MODULE_ALIAS_NETDEV("ip_vti0"); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 27b8f83c6ea20..03afa3871efc5 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -658,6 +658,7 @@ static void __exit ipip_fini(void) module_init(ipip_init); module_exit(ipip_fini); +MODULE_DESCRIPTION("IP/IP protocol decoder library"); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("ipip"); MODULE_ALIAS_NETDEV("tunl0"); diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index 5048c47c79b28..4c1f836aae38b 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -294,4 +294,5 @@ static void __exit tunnel4_fini(void) module_init(tunnel4_init); module_exit(tunnel4_fini); +MODULE_DESCRIPTION("IPv4 XFRM tunnel library"); MODULE_LICENSE("GPL"); diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index a87defb2b1672..860aff5f85990 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -253,4 +253,5 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, } EXPORT_SYMBOL_GPL(udp_tunnel_dst_lookup); +MODULE_DESCRIPTION("IPv4 Foo over UDP tunnel driver"); MODULE_LICENSE("GPL"); diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 8489fa1065837..8cb266af13931 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -114,5 +114,6 @@ static void __exit ipip_fini(void) module_init(ipip_init); module_exit(ipip_fini); +MODULE_DESCRIPTION("IPv4 XFRM tunnel driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_IPIP); -- GitLab From a46c31bf2744b9807ba5e3ac8fdae2368d8bb3fa Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 8 Feb 2024 08:42:42 -0800 Subject: [PATCH 0909/1405] net: fill in MODULE_DESCRIPTION()s for net/sched W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the network schedulers. Suggested-by: Jamal Hadi Salim Signed-off-by: Breno Leitao Reviewed-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20240208164244.3818498-8-leitao@debian.org Signed-off-by: Jakub Kicinski --- net/sched/em_canid.c | 1 + net/sched/em_cmp.c | 1 + net/sched/em_meta.c | 1 + net/sched/em_nbyte.c | 1 + net/sched/em_text.c | 1 + net/sched/em_u32.c | 1 + 6 files changed, 6 insertions(+) diff --git a/net/sched/em_canid.c b/net/sched/em_canid.c index 5ea84decec19a..5337bc4627551 100644 --- a/net/sched/em_canid.c +++ b/net/sched/em_canid.c @@ -222,6 +222,7 @@ static void __exit exit_em_canid(void) tcf_em_unregister(&em_canid_ops); } +MODULE_DESCRIPTION("ematch classifier to match CAN IDs embedded in skb CAN frames"); MODULE_LICENSE("GPL"); module_init(init_em_canid); diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c index f17b049ea5309..c90ad7ea26b46 100644 --- a/net/sched/em_cmp.c +++ b/net/sched/em_cmp.c @@ -87,6 +87,7 @@ static void __exit exit_em_cmp(void) tcf_em_unregister(&em_cmp_ops); } +MODULE_DESCRIPTION("ematch classifier for basic data types(8/16/32 bit) against skb data"); MODULE_LICENSE("GPL"); module_init(init_em_cmp); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 09d8afd04a2a7..8996c73c9779b 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -1006,6 +1006,7 @@ static void __exit exit_em_meta(void) tcf_em_unregister(&em_meta_ops); } +MODULE_DESCRIPTION("ematch classifier for various internal kernel metadata, skb metadata and sk metadata"); MODULE_LICENSE("GPL"); module_init(init_em_meta); diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index a83b237cbeb06..4f9f21a05d5e4 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -68,6 +68,7 @@ static void __exit exit_em_nbyte(void) tcf_em_unregister(&em_nbyte_ops); } +MODULE_DESCRIPTION("ematch classifier for arbitrary skb multi-bytes"); MODULE_LICENSE("GPL"); module_init(init_em_nbyte); diff --git a/net/sched/em_text.c b/net/sched/em_text.c index f176afb70559e..420c66203b177 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c @@ -147,6 +147,7 @@ static void __exit exit_em_text(void) tcf_em_unregister(&em_text_ops); } +MODULE_DESCRIPTION("ematch classifier for embedded text in skbs"); MODULE_LICENSE("GPL"); module_init(init_em_text); diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c index 71b070da04379..fdec4db5ec89d 100644 --- a/net/sched/em_u32.c +++ b/net/sched/em_u32.c @@ -52,6 +52,7 @@ static void __exit exit_em_u32(void) tcf_em_unregister(&em_u32_ops); } +MODULE_DESCRIPTION("ematch skb classifier using 32 bit chunks of data"); MODULE_LICENSE("GPL"); module_init(init_em_u32); -- GitLab From 830bd88cc151412f467423c3db0b29fe623c6487 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 8 Feb 2024 08:42:43 -0800 Subject: [PATCH 0910/1405] net: fill in MODULE_DESCRIPTION()s for ipvtap W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the IP-VLAN based tap driver. Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240208164244.3818498-9-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/ipvlan/ipvtap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c index 60944a4beadae..1afc4c47be73f 100644 --- a/drivers/net/ipvlan/ipvtap.c +++ b/drivers/net/ipvlan/ipvtap.c @@ -237,4 +237,5 @@ static void __exit ipvtap_exit(void) module_exit(ipvtap_exit); MODULE_ALIAS_RTNL_LINK("ipvtap"); MODULE_AUTHOR("Sainath Grandhi "); +MODULE_DESCRIPTION("IP-VLAN based tap driver"); MODULE_LICENSE("GPL"); -- GitLab From 6034e059f5d34bceb4adefc3409043bf65c896ea Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 8 Feb 2024 08:42:44 -0800 Subject: [PATCH 0911/1405] net: fill in MODULE_DESCRIPTION()s for dsa_loop_bdinfo W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the DSA loopback fixed PHY module. Suggested-by: Florian Fainelli Signed-off-by: Breno Leitao Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20240208164244.3818498-10-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/dsa_loop_bdinfo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/dsa_loop_bdinfo.c b/drivers/net/dsa/dsa_loop_bdinfo.c index 237066d307044..14ca42491512c 100644 --- a/drivers/net/dsa/dsa_loop_bdinfo.c +++ b/drivers/net/dsa/dsa_loop_bdinfo.c @@ -32,4 +32,5 @@ static int __init dsa_loop_bdinfo_init(void) } arch_initcall(dsa_loop_bdinfo_init) +MODULE_DESCRIPTION("DSA mock-up switch driver"); MODULE_LICENSE("GPL"); -- GitLab From 4356e9f841f7fbb945521cef3577ba394c65f3fc Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 9 Feb 2024 12:39:31 -0800 Subject: [PATCH 0912/1405] work around gcc bugs with 'asm goto' with outputs We've had issues with gcc and 'asm goto' before, and we created a 'asm_volatile_goto()' macro for that in the past: see commits 3f0116c3238a ("compiler/gcc4: Add quirk for 'asm goto' miscompilation bug") and a9f180345f53 ("compiler/gcc4: Make quirk for asm_volatile_goto() unconditional"). Then, much later, we ended up removing the workaround in commit 43c249ea0b1e ("compiler-gcc.h: remove ancient workaround for gcc PR 58670") because we no longer supported building the kernel with the affected gcc versions, but we left the macro uses around. Now, Sean Christopherson reports a new version of a very similar problem, which is fixed by re-applying that ancient workaround. But the problem in question is limited to only the 'asm goto with outputs' cases, so instead of re-introducing the old workaround as-is, let's rename and limit the workaround to just that much less common case. It looks like there are at least two separate issues that all hit in this area: (a) some versions of gcc don't mark the asm goto as 'volatile' when it has outputs: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420 which is easy to work around by just adding the 'volatile' by hand. (b) Internal compiler errors: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422 which are worked around by adding the extra empty 'asm' as a barrier, as in the original workaround. but the problem Sean sees may be a third thing since it involves bad code generation (not an ICE) even with the manually added 'volatile'. but the same old workaround works for this case, even if this feels a bit like voodoo programming and may only be hiding the issue. Reported-and-tested-by: Sean Christopherson Link: https://lore.kernel.org/all/20240208220604.140859-1-seanjc@google.com/ Cc: Nick Desaulniers Cc: Uros Bizjak Cc: Jakub Jelinek Cc: Andrew Pinski Signed-off-by: Linus Torvalds --- arch/arc/include/asm/jump_label.h | 4 ++-- arch/arm/include/asm/jump_label.h | 4 ++-- arch/arm64/include/asm/alternative-macros.h | 4 ++-- arch/arm64/include/asm/jump_label.h | 4 ++-- arch/csky/include/asm/jump_label.h | 4 ++-- arch/loongarch/include/asm/jump_label.h | 4 ++-- arch/mips/include/asm/jump_label.h | 4 ++-- arch/parisc/include/asm/jump_label.h | 4 ++-- arch/powerpc/include/asm/jump_label.h | 4 ++-- arch/powerpc/include/asm/uaccess.h | 12 ++++++------ arch/powerpc/kernel/irq_64.c | 2 +- arch/riscv/include/asm/arch_hweight.h | 4 ++-- arch/riscv/include/asm/bitops.h | 8 ++++---- arch/riscv/include/asm/checksum.h | 2 +- arch/riscv/include/asm/cpufeature.h | 4 ++-- arch/riscv/include/asm/jump_label.h | 4 ++-- arch/riscv/lib/csum.c | 10 +++++----- arch/s390/include/asm/jump_label.h | 4 ++-- arch/sparc/include/asm/jump_label.h | 4 ++-- arch/um/include/asm/cpufeature.h | 2 +- arch/x86/include/asm/cpufeature.h | 2 +- arch/x86/include/asm/jump_label.h | 6 +++--- arch/x86/include/asm/rmwcc.h | 2 +- arch/x86/include/asm/special_insns.h | 2 +- arch/x86/include/asm/uaccess.h | 10 +++++----- arch/x86/kvm/svm/svm_ops.h | 6 +++--- arch/x86/kvm/vmx/vmx.c | 4 ++-- arch/x86/kvm/vmx/vmx_ops.h | 6 +++--- arch/xtensa/include/asm/jump_label.h | 4 ++-- include/linux/compiler-gcc.h | 19 +++++++++++++++++++ include/linux/compiler_types.h | 4 ++-- net/netfilter/nft_set_pipapo_avx2.c | 2 +- samples/bpf/asm_goto_workaround.h | 8 ++++---- tools/arch/x86/include/asm/rmwcc.h | 2 +- tools/include/linux/compiler_types.h | 4 ++-- 35 files changed, 96 insertions(+), 77 deletions(-) diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h index 9d96180797396..a339223d9e052 100644 --- a/arch/arc/include/asm/jump_label.h +++ b/arch/arc/include/asm/jump_label.h @@ -31,7 +31,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" + asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" "1: \n" "nop \n" ".pushsection __jump_table, \"aw\" \n" @@ -47,7 +47,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" + asm goto(".balign "__stringify(JUMP_LABEL_NOP_SIZE)" \n" "1: \n" "b %l[l_yes] \n" ".pushsection __jump_table, \"aw\" \n" diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index e12d7d096fc03..e4eb54f6cd9fe 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -11,7 +11,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" WASM(nop) "\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" @@ -25,7 +25,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" WASM(b) " %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index 210bb43cff2c7..d328f549b1a60 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -229,7 +229,7 @@ alternative_has_cap_likely(const unsigned long cpucap) if (!cpucap_is_possible(cpucap)) return false; - asm_volatile_goto( + asm goto( ALTERNATIVE_CB("b %l[l_no]", %[cpucap], alt_cb_patch_nops) : : [cpucap] "i" (cpucap) @@ -247,7 +247,7 @@ alternative_has_cap_unlikely(const unsigned long cpucap) if (!cpucap_is_possible(cpucap)) return false; - asm_volatile_goto( + asm goto( ALTERNATIVE("nop", "b %l[l_yes]", %[cpucap]) : : [cpucap] "i" (cpucap) diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 48ddc0f45d228..6aafbb7899916 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -18,7 +18,7 @@ static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( "1: nop \n\t" " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" @@ -35,7 +35,7 @@ static __always_inline bool arch_static_branch(struct static_key * const key, static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( "1: b %l[l_yes] \n\t" " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" diff --git a/arch/csky/include/asm/jump_label.h b/arch/csky/include/asm/jump_label.h index 98a3f4b168bd2..ef2e37a10a0fe 100644 --- a/arch/csky/include/asm/jump_label.h +++ b/arch/csky/include/asm/jump_label.h @@ -12,7 +12,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: nop32 \n" " .pushsection __jump_table, \"aw\" \n" " .align 2 \n" @@ -29,7 +29,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto( + asm goto( "1: bsr32 %l[label] \n" " .pushsection __jump_table, \"aw\" \n" " .align 2 \n" diff --git a/arch/loongarch/include/asm/jump_label.h b/arch/loongarch/include/asm/jump_label.h index 3cea299a5ef58..29acfe3de3faa 100644 --- a/arch/loongarch/include/asm/jump_label.h +++ b/arch/loongarch/include/asm/jump_label.h @@ -22,7 +22,7 @@ static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( "1: nop \n\t" JUMP_TABLE_ENTRY : : "i"(&((char *)key)[branch]) : : l_yes); @@ -35,7 +35,7 @@ static __always_inline bool arch_static_branch(struct static_key * const key, co static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( "1: b %l[l_yes] \n\t" JUMP_TABLE_ENTRY : : "i"(&((char *)key)[branch]) : : l_yes); diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 081be98c71ef4..ff5d388502d4a 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -39,7 +39,7 @@ extern void jump_label_apply_nops(struct module *mod); static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\t" B_INSN " 2f\n\t" + asm goto("1:\t" B_INSN " 2f\n\t" "2:\t.insn\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" @@ -53,7 +53,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\t" J_INSN " %l[l_yes]\n\t" + asm goto("1:\t" J_INSN " %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" ".popsection\n\t" diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h index 94428798b6aa6..317ebc5edc9fe 100644 --- a/arch/parisc/include/asm/jump_label.h +++ b/arch/parisc/include/asm/jump_label.h @@ -12,7 +12,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align %1\n\t" @@ -29,7 +29,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "b,n %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align %1\n\t" diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 93ce3ec253877..2f2a86ed2280a 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -17,7 +17,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "nop # arch_static_branch\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".long 1b - ., %l[l_yes] - .\n\t" @@ -32,7 +32,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "b %l[l_yes] # arch_static_branch_jump\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".long 1b - ., %l[l_yes] - .\n\t" diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index f1f9890f50d3e..de10437fd2065 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -74,7 +74,7 @@ __pu_failed: \ /* -mprefixed can generate offsets beyond range, fall back hack */ #ifdef CONFIG_PPC_KERNEL_PREFIXED #define __put_user_asm_goto(x, addr, label, op) \ - asm_volatile_goto( \ + asm goto( \ "1: " op " %0,0(%1) # put_user\n" \ EX_TABLE(1b, %l2) \ : \ @@ -83,7 +83,7 @@ __pu_failed: \ : label) #else #define __put_user_asm_goto(x, addr, label, op) \ - asm_volatile_goto( \ + asm goto( \ "1: " op "%U1%X1 %0,%1 # put_user\n" \ EX_TABLE(1b, %l2) \ : \ @@ -97,7 +97,7 @@ __pu_failed: \ __put_user_asm_goto(x, ptr, label, "std") #else /* __powerpc64__ */ #define __put_user_asm2_goto(x, addr, label) \ - asm_volatile_goto( \ + asm goto( \ "1: stw%X1 %0, %1\n" \ "2: stw%X1 %L0, %L1\n" \ EX_TABLE(1b, %l2) \ @@ -146,7 +146,7 @@ do { \ /* -mprefixed can generate offsets beyond range, fall back hack */ #ifdef CONFIG_PPC_KERNEL_PREFIXED #define __get_user_asm_goto(x, addr, label, op) \ - asm_volatile_goto( \ + asm_goto_output( \ "1: "op" %0,0(%1) # get_user\n" \ EX_TABLE(1b, %l2) \ : "=r" (x) \ @@ -155,7 +155,7 @@ do { \ : label) #else #define __get_user_asm_goto(x, addr, label, op) \ - asm_volatile_goto( \ + asm_goto_output( \ "1: "op"%U1%X1 %0, %1 # get_user\n" \ EX_TABLE(1b, %l2) \ : "=r" (x) \ @@ -169,7 +169,7 @@ do { \ __get_user_asm_goto(x, addr, label, "ld") #else /* __powerpc64__ */ #define __get_user_asm2_goto(x, addr, label) \ - asm_volatile_goto( \ + asm_goto_output( \ "1: lwz%X1 %0, %1\n" \ "2: lwz%X1 %L0, %L1\n" \ EX_TABLE(1b, %l2) \ diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c index 938e66829eae6..d5c48d1b0a31e 100644 --- a/arch/powerpc/kernel/irq_64.c +++ b/arch/powerpc/kernel/irq_64.c @@ -230,7 +230,7 @@ notrace __no_kcsan void arch_local_irq_restore(unsigned long mask) * This allows interrupts to be unmasked without hard disabling, and * also without new hard interrupts coming in ahead of pending ones. */ - asm_volatile_goto( + asm goto( "1: \n" " lbz 9,%0(13) \n" " cmpwi 9,0 \n" diff --git a/arch/riscv/include/asm/arch_hweight.h b/arch/riscv/include/asm/arch_hweight.h index c20236a0725b9..85b2c443823e8 100644 --- a/arch/riscv/include/asm/arch_hweight.h +++ b/arch/riscv/include/asm/arch_hweight.h @@ -20,7 +20,7 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w) { #ifdef CONFIG_RISCV_ISA_ZBB - asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); @@ -51,7 +51,7 @@ static inline unsigned int __arch_hweight8(unsigned int w) static __always_inline unsigned long __arch_hweight64(__u64 w) { # ifdef CONFIG_RISCV_ISA_ZBB - asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 9ffc355370248..329d8244a9b3f 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -39,7 +39,7 @@ static __always_inline unsigned long variable__ffs(unsigned long word) { int num; - asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); @@ -95,7 +95,7 @@ static __always_inline unsigned long variable__fls(unsigned long word) { int num; - asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); @@ -154,7 +154,7 @@ static __always_inline int variable_ffs(int x) if (!x) return 0; - asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); @@ -209,7 +209,7 @@ static __always_inline int variable_fls(unsigned int x) if (!x) return 0; - asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : : : legacy); diff --git a/arch/riscv/include/asm/checksum.h b/arch/riscv/include/asm/checksum.h index a5b60b54b101c..88e6f1499e889 100644 --- a/arch/riscv/include/asm/checksum.h +++ b/arch/riscv/include/asm/checksum.h @@ -53,7 +53,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { unsigned long fold_temp; - asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, + asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 5a626ed2c47a8..0bd11862b7607 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -80,7 +80,7 @@ riscv_has_extension_likely(const unsigned long ext) "ext must be < RISCV_ISA_EXT_MAX"); if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { - asm_volatile_goto( + asm goto( ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1) : : [ext] "i" (ext) @@ -103,7 +103,7 @@ riscv_has_extension_unlikely(const unsigned long ext) "ext must be < RISCV_ISA_EXT_MAX"); if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { - asm_volatile_goto( + asm goto( ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1) : : [ext] "i" (ext) diff --git a/arch/riscv/include/asm/jump_label.h b/arch/riscv/include/asm/jump_label.h index 14a5ea8d8ef0f..4a35d787c0191 100644 --- a/arch/riscv/include/asm/jump_label.h +++ b/arch/riscv/include/asm/jump_label.h @@ -17,7 +17,7 @@ static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( " .align 2 \n\t" " .option push \n\t" " .option norelax \n\t" @@ -39,7 +39,7 @@ static __always_inline bool arch_static_branch(struct static_key * const key, static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { - asm_volatile_goto( + asm goto( " .align 2 \n\t" " .option push \n\t" " .option norelax \n\t" diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c index af3df5274ccba..74af3ab520b6d 100644 --- a/arch/riscv/lib/csum.c +++ b/arch/riscv/lib/csum.c @@ -53,7 +53,7 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, * support, so nop when Zbb is available and jump when Zbb is * not available. */ - asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, + asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : @@ -170,7 +170,7 @@ do_csum_with_alignment(const unsigned char *buff, int len) * support, so nop when Zbb is available and jump when Zbb is * not available. */ - asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, + asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : @@ -178,7 +178,7 @@ do_csum_with_alignment(const unsigned char *buff, int len) : no_zbb); #ifdef CONFIG_32BIT - asm_volatile_goto(".option push \n\ + asm_goto_output(".option push \n\ .option arch,+zbb \n\ rori %[fold_temp], %[csum], 16 \n\ andi %[offset], %[offset], 1 \n\ @@ -193,7 +193,7 @@ do_csum_with_alignment(const unsigned char *buff, int len) return (unsigned short)csum; #else /* !CONFIG_32BIT */ - asm_volatile_goto(".option push \n\ + asm_goto_output(".option push \n\ .option arch,+zbb \n\ rori %[fold_temp], %[csum], 32 \n\ add %[csum], %[fold_temp], %[csum] \n\ @@ -257,7 +257,7 @@ do_csum_no_alignment(const unsigned char *buff, int len) * support, so nop when Zbb is available and jump when Zbb is * not available. */ - asm_volatile_goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, + asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0, RISCV_ISA_EXT_ZBB, 1) : : diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 895f774bbcc55..bf78cf381dfcd 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -25,7 +25,7 @@ */ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("0: brcl 0,%l[label]\n" + asm goto("0: brcl 0,%l[label]\n" ".pushsection __jump_table,\"aw\"\n" ".balign 8\n" ".long 0b-.,%l[label]-.\n" @@ -39,7 +39,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("0: brcl 15,%l[label]\n" + asm goto("0: brcl 15,%l[label]\n" ".pushsection __jump_table,\"aw\"\n" ".balign 8\n" ".long 0b-.,%l[label]-.\n" diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index 94eb529dcb776..2718cbea826a7 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -10,7 +10,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "nop\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" @@ -26,7 +26,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "b %l[l_yes]\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" diff --git a/arch/um/include/asm/cpufeature.h b/arch/um/include/asm/cpufeature.h index 4b6d1b526bc12..66fe06db872f0 100644 --- a/arch/um/include/asm/cpufeature.h +++ b/arch/um/include/asm/cpufeature.h @@ -75,7 +75,7 @@ extern void setup_clear_cpu_cap(unsigned int bit); */ static __always_inline bool _static_cpu_has(u16 bit) { - asm_volatile_goto("1: jmp 6f\n" + asm goto("1: jmp 6f\n" "2:\n" ".skip -(((5f-4f) - (2b-1b)) > 0) * " "((5f-4f) - (2b-1b)),0x90\n" diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index a26bebbdff87e..a1273698fc430 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -168,7 +168,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); */ static __always_inline bool _static_cpu_has(u16 bit) { - asm_volatile_goto( + asm goto( ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]") ".pushsection .altinstr_aux,\"ax\"\n" "6:\n" diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 071572e23d3a0..cbbef32517f00 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -24,7 +24,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:" + asm goto("1:" "jmp %l[l_yes] # objtool NOPs this \n\t" JUMP_TABLE_ENTRY : : "i" (key), "i" (2 | branch) : : l_yes); @@ -38,7 +38,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { - asm_volatile_goto("1:" + asm goto("1:" ".byte " __stringify(BYTES_NOP5) "\n\t" JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); @@ -52,7 +52,7 @@ static __always_inline bool arch_static_branch(struct static_key * const key, co static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { - asm_volatile_goto("1:" + asm goto("1:" "jmp %l[l_yes]\n\t" JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 4b081e0d3306b..363266cbcadaf 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -13,7 +13,7 @@ #define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \ ({ \ bool c = false; \ - asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \ + asm goto (fullop "; j" #cc " %l[cc_label]" \ : : [var] "m" (_var), ## __VA_ARGS__ \ : clobbers : cc_label); \ if (0) { \ diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index d6cd9344f6c78..48f8dd47cf688 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -205,7 +205,7 @@ static inline void clwb(volatile void *__p) #ifdef CONFIG_X86_USER_SHADOW_STACK static inline int write_user_shstk_64(u64 __user *addr, u64 val) { - asm_volatile_goto("1: wrussq %[val], (%[addr])\n" + asm goto("1: wrussq %[val], (%[addr])\n" _ASM_EXTABLE(1b, %l[fail]) :: [addr] "r" (addr), [val] "r" (val) :: fail); diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 5c367c1290c35..237dc8cdd12b9 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -133,7 +133,7 @@ extern int __get_user_bad(void); #ifdef CONFIG_X86_32 #define __put_user_goto_u64(x, addr, label) \ - asm_volatile_goto("\n" \ + asm goto("\n" \ "1: movl %%eax,0(%1)\n" \ "2: movl %%edx,4(%1)\n" \ _ASM_EXTABLE_UA(1b, %l2) \ @@ -295,7 +295,7 @@ do { \ } while (0) #define __get_user_asm(x, addr, itype, ltype, label) \ - asm_volatile_goto("\n" \ + asm_goto_output("\n" \ "1: mov"itype" %[umem],%[output]\n" \ _ASM_EXTABLE_UA(1b, %l2) \ : [output] ltype(x) \ @@ -375,7 +375,7 @@ do { \ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ __typeof__(*(_ptr)) __old = *_old; \ __typeof__(*(_ptr)) __new = (_new); \ - asm_volatile_goto("\n" \ + asm_goto_output("\n" \ "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ _ASM_EXTABLE_UA(1b, %l[label]) \ : CC_OUT(z) (success), \ @@ -394,7 +394,7 @@ do { \ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ __typeof__(*(_ptr)) __old = *_old; \ __typeof__(*(_ptr)) __new = (_new); \ - asm_volatile_goto("\n" \ + asm_goto_output("\n" \ "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ _ASM_EXTABLE_UA(1b, %l[label]) \ : CC_OUT(z) (success), \ @@ -477,7 +477,7 @@ struct __large_struct { unsigned long buf[100]; }; * aliasing issues. */ #define __put_user_goto(x, addr, itype, ltype, label) \ - asm_volatile_goto("\n" \ + asm goto("\n" \ "1: mov"itype" %0,%1\n" \ _ASM_EXTABLE_UA(1b, %l2) \ : : ltype(x), "m" (__m(addr)) \ diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h index 36c8af87a707a..4e725854c63a1 100644 --- a/arch/x86/kvm/svm/svm_ops.h +++ b/arch/x86/kvm/svm/svm_ops.h @@ -8,7 +8,7 @@ #define svm_asm(insn, clobber...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) "\n\t" \ + asm goto("1: " __stringify(insn) "\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ ::: clobber : fault); \ return; \ @@ -18,7 +18,7 @@ fault: \ #define svm_asm1(insn, op1, clobber...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \ + asm goto("1: " __stringify(insn) " %0\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ :: op1 : clobber : fault); \ return; \ @@ -28,7 +28,7 @@ fault: \ #define svm_asm2(insn, op1, op2, clobber...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \ + asm goto("1: " __stringify(insn) " %1, %0\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ :: op1, op2 : clobber : fault); \ return; \ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e262bc2ba4e56..1111d9d089038 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -738,7 +738,7 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx, */ static int kvm_cpu_vmxoff(void) { - asm_volatile_goto("1: vmxoff\n\t" + asm goto("1: vmxoff\n\t" _ASM_EXTABLE(1b, %l[fault]) ::: "cc", "memory" : fault); @@ -2784,7 +2784,7 @@ static int kvm_cpu_vmxon(u64 vmxon_pointer) cr4_set_bits(X86_CR4_VMXE); - asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t" + asm goto("1: vmxon %[vmxon_pointer]\n\t" _ASM_EXTABLE(1b, %l[fault]) : : [vmxon_pointer] "m"(vmxon_pointer) : : fault); diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index f41ce3c24123a..8060e5fc6dbd8 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -94,7 +94,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field) #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT - asm_volatile_goto("1: vmread %[field], %[output]\n\t" + asm_goto_output("1: vmread %[field], %[output]\n\t" "jna %l[do_fail]\n\t" _ASM_EXTABLE(1b, %l[do_exception]) @@ -188,7 +188,7 @@ static __always_inline unsigned long vmcs_readl(unsigned long field) #define vmx_asm1(insn, op1, error_args...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \ + asm goto("1: " __stringify(insn) " %0\n\t" \ ".byte 0x2e\n\t" /* branch not taken hint */ \ "jna %l[error]\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ @@ -205,7 +205,7 @@ fault: \ #define vmx_asm2(insn, op1, op2, error_args...) \ do { \ - asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \ + asm goto("1: " __stringify(insn) " %1, %0\n\t" \ ".byte 0x2e\n\t" /* branch not taken hint */ \ "jna %l[error]\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ diff --git a/arch/xtensa/include/asm/jump_label.h b/arch/xtensa/include/asm/jump_label.h index c812bf85021c0..46c8596259d2d 100644 --- a/arch/xtensa/include/asm/jump_label.h +++ b/arch/xtensa/include/asm/jump_label.h @@ -13,7 +13,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" "_nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" @@ -38,7 +38,7 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, * make it reachable and wrap both into a no-transform block * to avoid any assembler interference with this. */ - asm_volatile_goto("1:\n\t" + asm goto("1:\n\t" ".begin no-transform\n\t" "_j %l[l_yes]\n\t" "2:\n\t" diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index aebb65bf95a79..c1a963be7d289 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -64,6 +64,25 @@ __builtin_unreachable(); \ } while (0) +/* + * GCC 'asm goto' with outputs miscompiles certain code sequences: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420 + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422 + * + * Work it around via the same compiler barrier quirk that we used + * to use for the old 'asm goto' workaround. + * + * Also, always mark such 'asm goto' statements as volatile: all + * asm goto statements are supposed to be volatile as per the + * documentation, but some versions of gcc didn't actually do + * that for asms with outputs: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 + */ +#define asm_goto_output(x...) \ + do { asm volatile goto(x); asm (""); } while (0) + #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 6f1ca49306d2f..663d8791c871a 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -362,8 +362,8 @@ struct ftrace_likely_data { #define __member_size(p) __builtin_object_size(p, 1) #endif -#ifndef asm_volatile_goto -#define asm_volatile_goto(x...) asm goto(x) +#ifndef asm_goto_output +#define asm_goto_output(x...) asm goto(x) #endif #ifdef CONFIG_CC_HAS_ASM_INLINE diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 90e275bb3e5d7..a3a8ddca99189 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -57,7 +57,7 @@ /* Jump to label if @reg is zero */ #define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \ - asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \ + asm goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \ "je %l[" #label "]" : : : : label) /* Store 256 bits from YMM register into memory. Contrary to bucket load diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h index 7048bb3594d65..634e81d83efd9 100644 --- a/samples/bpf/asm_goto_workaround.h +++ b/samples/bpf/asm_goto_workaround.h @@ -4,14 +4,14 @@ #define __ASM_GOTO_WORKAROUND_H /* - * This will bring in asm_volatile_goto and asm_inline macro definitions + * This will bring in asm_goto_output and asm_inline macro definitions * if enabled by compiler and config options. */ #include -#ifdef asm_volatile_goto -#undef asm_volatile_goto -#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto") +#ifdef asm_goto_output +#undef asm_goto_output +#define asm_goto_output(x...) asm volatile("invalid use of asm_goto_output") #endif /* diff --git a/tools/arch/x86/include/asm/rmwcc.h b/tools/arch/x86/include/asm/rmwcc.h index 11ff975242cac..e2ff22b379a44 100644 --- a/tools/arch/x86/include/asm/rmwcc.h +++ b/tools/arch/x86/include/asm/rmwcc.h @@ -4,7 +4,7 @@ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ - asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \ + asm goto (fullop "; j" cc " %l[cc_label]" \ : : "m" (var), ## __VA_ARGS__ \ : "memory" : cc_label); \ return 0; \ diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h index 1bdd834bdd571..d09f9dc172a48 100644 --- a/tools/include/linux/compiler_types.h +++ b/tools/include/linux/compiler_types.h @@ -36,8 +36,8 @@ #include #endif -#ifndef asm_volatile_goto -#define asm_volatile_goto(x...) asm goto(x) +#ifndef asm_goto_output +#define asm_goto_output(x...) asm goto(x) #endif #endif /* __LINUX_COMPILER_TYPES_H */ -- GitLab From 7e4a205fe56b9092f0143dad6aa5fee081139b09 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 3 Feb 2024 23:53:05 -0500 Subject: [PATCH 0913/1405] Revert "get rid of DCACHE_GENOCIDE" This reverts commit 57851607326a2beef21e67f83f4f53a90df8445a. Unfortunately, while we only call that thing once, the callback *can* be called more than once for the same dentry - all it takes is rename_lock being touched while we are in d_walk(). For now let's revert it. Signed-off-by: Al Viro --- fs/dcache.c | 5 ++++- include/linux/dcache.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/dcache.c b/fs/dcache.c index b813528fb1477..6ebccba333368 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3061,7 +3061,10 @@ static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry) if (d_unhashed(dentry) || !dentry->d_inode) return D_WALK_SKIP; - dentry->d_lockref.count--; + if (!(dentry->d_flags & DCACHE_GENOCIDE)) { + dentry->d_flags |= DCACHE_GENOCIDE; + dentry->d_lockref.count--; + } } return D_WALK_CONTINUE; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 1666c387861f7..d07cf2f1bb7db 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -173,6 +173,7 @@ struct dentry_operations { #define DCACHE_DONTCACHE BIT(7) /* Purge from memory on final dput() */ #define DCACHE_CANT_MOUNT BIT(8) +#define DCACHE_GENOCIDE BIT(9) #define DCACHE_SHRINK_LIST BIT(10) #define DCACHE_OP_WEAK_REVALIDATE BIT(11) -- GitLab From a22b0a2be69a36511cb5b37d948b651ddf7debf3 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Wed, 7 Feb 2024 15:20:05 +0200 Subject: [PATCH 0914/1405] iio: adc: ad4130: zero-initialize clock init data The clk_init_data struct does not have all its members initialized, causing issues when trying to expose the internal clock on the CLK pin. Fix this by zero-initializing the clk_init_data struct. Fixes: 62094060cf3a ("iio: adc: ad4130: add AD4130 driver") Signed-off-by: Cosmin Tanislav Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20240207132007.253768-1-demonsingur@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4130.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c index feb86fe6c422d..9daeac16499b3 100644 --- a/drivers/iio/adc/ad4130.c +++ b/drivers/iio/adc/ad4130.c @@ -1821,7 +1821,7 @@ static int ad4130_setup_int_clk(struct ad4130_state *st) { struct device *dev = &st->spi->dev; struct device_node *of_node = dev_of_node(dev); - struct clk_init_data init; + struct clk_init_data init = {}; const char *clk_name; int ret; -- GitLab From 78367c32bebfe833cd30c855755d863a4ff3fdee Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Wed, 7 Feb 2024 15:20:06 +0200 Subject: [PATCH 0915/1405] iio: adc: ad4130: only set GPIO_CTRL if pin is unused Currently, GPIO_CTRL bits are set even if the pins are used for measurements. GPIO_CTRL bits should only be set if the pin is not used for other functionality. Fix this by only setting the GPIO_CTRL bits if the pin has no other function. Fixes: 62094060cf3a ("iio: adc: ad4130: add AD4130 driver") Signed-off-by: Cosmin Tanislav Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20240207132007.253768-2-demonsingur@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4130.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c index 9daeac16499b3..62490424b6aed 100644 --- a/drivers/iio/adc/ad4130.c +++ b/drivers/iio/adc/ad4130.c @@ -1891,10 +1891,14 @@ static int ad4130_setup(struct iio_dev *indio_dev) return ret; /* - * Configure all GPIOs for output. If configured, the interrupt function - * of P2 takes priority over the GPIO out function. + * Configure unused GPIOs for output. If configured, the interrupt + * function of P2 takes priority over the GPIO out function. */ - val = AD4130_IO_CONTROL_GPIO_CTRL_MASK; + val = 0; + for (i = 0; i < AD4130_MAX_GPIOS; i++) + if (st->pins_fn[i + AD4130_AIN2_P1] == AD4130_PIN_FN_NONE) + val |= FIELD_PREP(AD4130_IO_CONTROL_GPIO_CTRL_MASK, BIT(i)); + val |= FIELD_PREP(AD4130_IO_CONTROL_INT_PIN_SEL_MASK, st->int_pin_sel); ret = regmap_write(st->regmap, AD4130_IO_CONTROL_REG, val); -- GitLab From c57ca512f3b68ddcd62bda9cc24a8f5584ab01b1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:18 -0800 Subject: [PATCH 0916/1405] net: tls: factor out tls_*crypt_async_wait() Factor out waiting for async encrypt and decrypt to finish. There are already multiple copies and a subsequent fix will need more. No functional changes. Note that crypto_wait_req() returns wait->err Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 96 +++++++++++++++++++++++------------------------- 1 file changed, 45 insertions(+), 51 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 31e8a94dfc111..6a73714f34cc4 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -230,6 +230,20 @@ static void tls_decrypt_done(void *data, int err) spin_unlock_bh(&ctx->decrypt_compl_lock); } +static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) +{ + int pending; + + spin_lock_bh(&ctx->decrypt_compl_lock); + reinit_completion(&ctx->async_wait.completion); + pending = atomic_read(&ctx->decrypt_pending); + spin_unlock_bh(&ctx->decrypt_compl_lock); + if (pending) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + + return ctx->async_wait.err; +} + static int tls_do_decryption(struct sock *sk, struct scatterlist *sgin, struct scatterlist *sgout, @@ -495,6 +509,28 @@ static void tls_encrypt_done(void *data, int err) schedule_delayed_work(&ctx->tx_work.work, 1); } +static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) +{ + int pending; + + spin_lock_bh(&ctx->encrypt_compl_lock); + ctx->async_notify = true; + + pending = atomic_read(&ctx->encrypt_pending); + spin_unlock_bh(&ctx->encrypt_compl_lock); + if (pending) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + else + reinit_completion(&ctx->async_wait.completion); + + /* There can be no concurrent accesses, since we have no + * pending encrypt operations + */ + WRITE_ONCE(ctx->async_notify, false); + + return ctx->async_wait.err; +} + static int tls_do_encryption(struct sock *sk, struct tls_context *tls_ctx, struct tls_sw_context_tx *ctx, @@ -984,7 +1020,6 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, int num_zc = 0; int orig_size; int ret = 0; - int pending; if (!eor && (msg->msg_flags & MSG_EOR)) return -EINVAL; @@ -1163,24 +1198,12 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, if (!num_async) { goto send_end; } else if (num_zc) { - /* Wait for pending encryptions to get completed */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no - * pending encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); + int err; - if (ctx->async_wait.err) { - ret = ctx->async_wait.err; + /* Wait for pending encryptions to get completed */ + err = tls_encrypt_async_wait(ctx); + if (err) { + ret = err; copied = 0; } } @@ -1229,7 +1252,6 @@ void tls_sw_splice_eof(struct socket *sock) ssize_t copied = 0; bool retrying = false; int ret = 0; - int pending; if (!ctx->open_rec) return; @@ -1264,22 +1286,7 @@ void tls_sw_splice_eof(struct socket *sock) } /* Wait for pending encryptions to get completed */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no pending - * encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); - - if (ctx->async_wait.err) + if (tls_encrypt_async_wait(ctx)) goto unlock; /* Transmit if any encryptions have completed */ @@ -2109,16 +2116,10 @@ int tls_sw_recvmsg(struct sock *sk, recv_end: if (async) { - int ret, pending; + int ret; /* Wait for all previously submitted records to be decrypted */ - spin_lock_bh(&ctx->decrypt_compl_lock); - reinit_completion(&ctx->async_wait.completion); - pending = atomic_read(&ctx->decrypt_pending); - spin_unlock_bh(&ctx->decrypt_compl_lock); - ret = 0; - if (pending) - ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + ret = tls_decrypt_async_wait(ctx); __skb_queue_purge(&ctx->async_hold); if (ret) { @@ -2435,16 +2436,9 @@ void tls_sw_release_resources_tx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec, *tmp; - int pending; /* Wait for any pending async encryptions to complete */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + tls_encrypt_async_wait(ctx); tls_tx_records(sk, -1); -- GitLab From aec7961916f3f9e88766e2688992da6980f11b8d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:19 -0800 Subject: [PATCH 0917/1405] tls: fix race between async notify and socket close The submitting thread (one which called recvmsg/sendmsg) may exit as soon as the async crypto handler calls complete() so any code past that point risks touching already freed data. Try to avoid the locking and extra flags altogether. Have the main thread hold an extra reference, this way we can depend solely on the atomic ref counter for synchronization. Don't futz with reiniting the completion, either, we are now tightly controlling when completion fires. Reported-by: valis Fixes: 0cada33241d9 ("net/tls: fix race condition causing kernel panic") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/net/tls.h | 5 ----- net/tls/tls_sw.c | 43 ++++++++++--------------------------------- 2 files changed, 10 insertions(+), 38 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 962f0c501111b..340ad43971e47 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -97,9 +97,6 @@ struct tls_sw_context_tx { struct tls_rec *open_rec; struct list_head tx_list; atomic_t encrypt_pending; - /* protect crypto_wait with encrypt_pending */ - spinlock_t encrypt_compl_lock; - int async_notify; u8 async_capable:1; #define BIT_TX_SCHEDULED 0 @@ -136,8 +133,6 @@ struct tls_sw_context_rx { struct tls_strparser strp; atomic_t decrypt_pending; - /* protect crypto_wait with decrypt_pending*/ - spinlock_t decrypt_compl_lock; struct sk_buff_head async_hold; struct wait_queue_head wq; }; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 6a73714f34cc4..635305bebfef6 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -224,22 +224,15 @@ static void tls_decrypt_done(void *data, int err) kfree(aead_req); - spin_lock_bh(&ctx->decrypt_compl_lock); - if (!atomic_dec_return(&ctx->decrypt_pending)) + if (atomic_dec_and_test(&ctx->decrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->decrypt_compl_lock); } static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) { - int pending; - - spin_lock_bh(&ctx->decrypt_compl_lock); - reinit_completion(&ctx->async_wait.completion); - pending = atomic_read(&ctx->decrypt_pending); - spin_unlock_bh(&ctx->decrypt_compl_lock); - if (pending) + if (!atomic_dec_and_test(&ctx->decrypt_pending)) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + atomic_inc(&ctx->decrypt_pending); return ctx->async_wait.err; } @@ -267,6 +260,7 @@ static int tls_do_decryption(struct sock *sk, aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, tls_decrypt_done, aead_req); + DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->decrypt_pending) < 1); atomic_inc(&ctx->decrypt_pending); } else { aead_request_set_callback(aead_req, @@ -455,7 +449,6 @@ static void tls_encrypt_done(void *data, int err) struct sk_msg *msg_en; bool ready = false; struct sock *sk; - int pending; msg_en = &rec->msg_encrypted; @@ -494,12 +487,8 @@ static void tls_encrypt_done(void *data, int err) ready = true; } - spin_lock_bh(&ctx->encrypt_compl_lock); - pending = atomic_dec_return(&ctx->encrypt_pending); - - if (!pending && ctx->async_notify) + if (atomic_dec_and_test(&ctx->encrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->encrypt_compl_lock); if (!ready) return; @@ -511,22 +500,9 @@ static void tls_encrypt_done(void *data, int err) static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) { - int pending; - - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) + if (!atomic_dec_and_test(&ctx->encrypt_pending)) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no - * pending encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); + atomic_inc(&ctx->encrypt_pending); return ctx->async_wait.err; } @@ -577,6 +553,7 @@ static int tls_do_encryption(struct sock *sk, /* Add the record in tx_list */ list_add_tail((struct list_head *)&rec->list, &ctx->tx_list); + DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->encrypt_pending) < 1); atomic_inc(&ctx->encrypt_pending); rc = crypto_aead_encrypt(aead_req); @@ -2601,7 +2578,7 @@ static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct soc } crypto_init_wait(&sw_ctx_tx->async_wait); - spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + atomic_set(&sw_ctx_tx->encrypt_pending, 1); INIT_LIST_HEAD(&sw_ctx_tx->tx_list); INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); sw_ctx_tx->tx_work.sk = sk; @@ -2622,7 +2599,7 @@ static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) } crypto_init_wait(&sw_ctx_rx->async_wait); - spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); + atomic_set(&sw_ctx_rx->decrypt_pending, 1); init_waitqueue_head(&sw_ctx_rx->wq); skb_queue_head_init(&sw_ctx_rx->rx_list); skb_queue_head_init(&sw_ctx_rx->async_hold); -- GitLab From e01e3934a1b2d122919f73bc6ddbe1cdafc4bbdb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:20 -0800 Subject: [PATCH 0918/1405] tls: fix race between tx work scheduling and socket close Similarly to previous commit, the submitting thread (recvmsg/sendmsg) may exit as soon as the async crypto handler calls complete(). Reorder scheduling the work before calling complete(). This seems more logical in the first place, as it's the inverse order of what the submitting thread will do. Reported-by: valis Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 635305bebfef6..9374a61cef00b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -447,7 +447,6 @@ static void tls_encrypt_done(void *data, int err) struct tls_rec *rec = data; struct scatterlist *sge; struct sk_msg *msg_en; - bool ready = false; struct sock *sk; msg_en = &rec->msg_encrypted; @@ -483,19 +482,16 @@ static void tls_encrypt_done(void *data, int err) /* If received record is at head of tx_list, schedule tx */ first_rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); - if (rec == first_rec) - ready = true; + if (rec == first_rec) { + /* Schedule the transmission */ + if (!test_and_set_bit(BIT_TX_SCHEDULED, + &ctx->tx_bitmask)) + schedule_delayed_work(&ctx->tx_work.work, 1); + } } if (atomic_dec_and_test(&ctx->encrypt_pending)) complete(&ctx->async_wait.completion); - - if (!ready) - return; - - /* Schedule the transmission */ - if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) - schedule_delayed_work(&ctx->tx_work.work, 1); } static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) -- GitLab From 8590541473188741055d27b955db0777569438e3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:21 -0800 Subject: [PATCH 0919/1405] net: tls: handle backlogging of crypto requests Since we're setting the CRYPTO_TFM_REQ_MAY_BACKLOG flag on our requests to the crypto API, crypto_aead_{encrypt,decrypt} can return -EBUSY instead of -EINPROGRESS in valid situations. For example, when the cryptd queue for AESNI is full (easy to trigger with an artificially low cryptd.cryptd_max_cpu_qlen), requests will be enqueued to the backlog but still processed. In that case, the async callback will also be called twice: first with err == -EINPROGRESS, which it seems we can just ignore, then with err == 0. Compared to Sabrina's original patch this version uses the new tls_*crypt_async_wait() helpers and converts the EBUSY to EINPROGRESS to avoid having to modify all the error handling paths. The handling is identical. Fixes: a54667f6728c ("tls: Add support for encryption using async offload accelerator") Fixes: 94524d8fc965 ("net/tls: Add support for async decryption of tls records") Co-developed-by: Sabrina Dubroca Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/netdev/9681d1febfec295449a62300938ed2ae66983f28.1694018970.git.sd@queasysnail.net/ Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 9374a61cef00b..63bef5666e36d 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -196,6 +196,17 @@ static void tls_decrypt_done(void *data, int err) struct sock *sk; int aead_size; + /* If requests get too backlogged crypto API returns -EBUSY and calls + * ->complete(-EINPROGRESS) immediately followed by ->complete(0) + * to make waiting for backlog to flush with crypto_wait_req() easier. + * First wait converts -EBUSY -> -EINPROGRESS, and the second one + * -EINPROGRESS -> 0. + * We have a single struct crypto_async_request per direction, this + * scheme doesn't help us, so just ignore the first ->complete(). + */ + if (err == -EINPROGRESS) + return; + aead_size = sizeof(*aead_req) + crypto_aead_reqsize(aead); aead_size = ALIGN(aead_size, __alignof__(*dctx)); dctx = (void *)((u8 *)aead_req + aead_size); @@ -269,6 +280,10 @@ static int tls_do_decryption(struct sock *sk, } ret = crypto_aead_decrypt(aead_req); + if (ret == -EBUSY) { + ret = tls_decrypt_async_wait(ctx); + ret = ret ?: -EINPROGRESS; + } if (ret == -EINPROGRESS) { if (darg->async) return 0; @@ -449,6 +464,9 @@ static void tls_encrypt_done(void *data, int err) struct sk_msg *msg_en; struct sock *sk; + if (err == -EINPROGRESS) /* see the comment in tls_decrypt_done() */ + return; + msg_en = &rec->msg_encrypted; sk = rec->sk; @@ -553,6 +571,10 @@ static int tls_do_encryption(struct sock *sk, atomic_inc(&ctx->encrypt_pending); rc = crypto_aead_encrypt(aead_req); + if (rc == -EBUSY) { + rc = tls_encrypt_async_wait(ctx); + rc = rc ?: -EINPROGRESS; + } if (!rc || rc != -EINPROGRESS) { atomic_dec(&ctx->encrypt_pending); sge->offset -= prot->prepend_size; -- GitLab From 32b55c5ff9103b8508c1e04bfa5a08c64e7a925f Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 6 Feb 2024 17:18:22 -0800 Subject: [PATCH 0920/1405] net: tls: fix use-after-free with partial reads and async decrypt tls_decrypt_sg doesn't take a reference on the pages from clear_skb, so the put_page() in tls_decrypt_done releases them, and we trigger a use-after-free in process_rx_list when we try to read from the partially-read skb. Fixes: fd31f3996af2 ("tls: rx: decrypt into a fresh skb") Signed-off-by: Sabrina Dubroca Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 63bef5666e36d..a6eff21ade23e 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -63,6 +63,7 @@ struct tls_decrypt_ctx { u8 iv[TLS_MAX_IV_SIZE]; u8 aad[TLS_MAX_AAD_SIZE]; u8 tail; + bool free_sgout; struct scatterlist sg[]; }; @@ -187,7 +188,6 @@ static void tls_decrypt_done(void *data, int err) struct aead_request *aead_req = data; struct crypto_aead *aead = crypto_aead_reqtfm(aead_req); struct scatterlist *sgout = aead_req->dst; - struct scatterlist *sgin = aead_req->src; struct tls_sw_context_rx *ctx; struct tls_decrypt_ctx *dctx; struct tls_context *tls_ctx; @@ -224,7 +224,7 @@ static void tls_decrypt_done(void *data, int err) } /* Free the destination pages if skb was not decrypted inplace */ - if (sgout != sgin) { + if (dctx->free_sgout) { /* Skip the first S/G entry as it points to AAD */ for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) { if (!sg) @@ -1583,6 +1583,7 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, } else if (out_sg) { memcpy(sgout, out_sg, n_sgout * sizeof(*sgout)); } + dctx->free_sgout = !!pages; /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, sgin, sgout, dctx->iv, -- GitLab From 49d821064c44cb5ffdf272905236012ea9ce50e3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:23 -0800 Subject: [PATCH 0921/1405] selftests: tls: use exact comparison in recv_partial This exact case was fail for async crypto and we weren't catching it. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- tools/testing/selftests/net/tls.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 7799e042a9719..bc36c91c4480f 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1002,12 +1002,12 @@ TEST_F(tls, recv_partial) memset(recv_mem, 0, sizeof(recv_mem)); EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); - EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first), - MSG_WAITALL), -1); + EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_first), + MSG_WAITALL), strlen(test_str_first)); EXPECT_EQ(memcmp(test_str_first, recv_mem, strlen(test_str_first)), 0); memset(recv_mem, 0, sizeof(recv_mem)); - EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second), - MSG_WAITALL), -1); + EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_second), + MSG_WAITALL), strlen(test_str_second)); EXPECT_EQ(memcmp(test_str_second, recv_mem, strlen(test_str_second)), 0); } -- GitLab From ac437a51ce662364062f704e321227f6728e6adc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Feb 2024 17:18:24 -0800 Subject: [PATCH 0922/1405] net: tls: fix returned read length with async decrypt We double count async, non-zc rx data. The previous fix was lucky because if we fully zc async_copy_bytes is 0 so we add 0. Decrypted already has all the bytes we handled, in all cases. We don't have to adjust anything, delete the erroneous line. Fixes: 4d42cd6bc2ac ("tls: rx: fix return value for async crypto") Co-developed-by: Sabrina Dubroca Signed-off-by: Sabrina Dubroca Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index a6eff21ade23e..9fbc70200cd0f 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2132,7 +2132,6 @@ int tls_sw_recvmsg(struct sock *sk, else err = process_rx_list(ctx, msg, &control, 0, async_copy_bytes, is_peek); - decrypted += max(err, 0); } copied += decrypted; -- GitLab From 1a1c93e7f8141749ecb10165a95e95ad484bb85f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 9 Feb 2024 18:34:05 -0500 Subject: [PATCH 0923/1405] bcachefs: Fix missing bch2_err_class() calls We aren't supposed to be leaking our private error codes outside of fs/bcachefs/. Fixes: Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index ec419b8e2c431..77ae65542db91 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -435,7 +435,7 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir, bch2_subvol_is_ro(c, inode->ei_subvol) ?: __bch2_link(c, inode, dir, dentry); if (unlikely(ret)) - return ret; + return bch2_err_class(ret); ihold(&inode->v); d_instantiate(dentry, &inode->v); @@ -487,8 +487,9 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry) struct bch_inode_info *dir= to_bch_ei(vdir); struct bch_fs *c = dir->v.i_sb->s_fs_info; - return bch2_subvol_is_ro(c, dir->ei_subvol) ?: + int ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?: __bch2_unlink(vdir, dentry, false); + return bch2_err_class(ret); } static int bch2_symlink(struct mnt_idmap *idmap, @@ -523,7 +524,7 @@ static int bch2_symlink(struct mnt_idmap *idmap, return 0; err: iput(&inode->v); - return ret; + return bch2_err_class(ret); } static int bch2_mkdir(struct mnt_idmap *idmap, @@ -641,7 +642,7 @@ static int bch2_rename2(struct mnt_idmap *idmap, src_inode, dst_inode); - return ret; + return bch2_err_class(ret); } static void bch2_setattr_copy(struct mnt_idmap *idmap, -- GitLab From 7dcfb87af9731d051decb9f99ec3cc35cf684865 Mon Sep 17 00:00:00 2001 From: Su Yue Date: Fri, 9 Feb 2024 21:41:31 +0800 Subject: [PATCH 0924/1405] bcachefs: fix kmemleak in __bch2_read_super error handling path During xfstest tests, there are some kmemleak reports e.g. generic/051 with if USE_KMEMLEAK=yes: ==================================================================== EXPERIMENTAL kmemleak reported some memory leaks! Due to the way kmemleak works, the leak might be from an earlier test, or something totally unrelated. unreferenced object 0xffff9ef905aaf778 (size 8): comm "mount.bcachefs", pid 169844, jiffies 4295281209 (age 87.040s) hex dump (first 8 bytes): a5 cc cc cc cc cc cc cc ........ backtrace: [] __kmem_cache_alloc_node+0x1f3/0x2c0 [] kmalloc_trace+0x26/0xb0 [] __bch2_read_super+0xfe/0x4e0 [bcachefs] [] bch2_fs_open+0x262/0x1710 [bcachefs] [] bch2_mount+0x4c4/0x640 [bcachefs] [] legacy_get_tree+0x30/0x60 [] vfs_get_tree+0x28/0xf0 [] path_mount+0x475/0xb60 [] __x64_sys_mount+0x105/0x140 [] do_syscall_64+0x42/0xf0 [] entry_SYSCALL_64_after_hwframe+0x6e/0x76 unreferenced object 0xffff9ef96cdc4fc0 (size 32): comm "mount.bcachefs", pid 169844, jiffies 4295281209 (age 87.040s) hex dump (first 32 bytes): 2f 64 65 76 2f 6d 61 70 70 65 72 2f 74 65 73 74 /dev/mapper/test 2d 31 00 cc cc cc cc cc cc cc cc cc cc cc cc cc -1.............. backtrace: [] __kmem_cache_alloc_node+0x1f3/0x2c0 [] __kmalloc_node_track_caller+0x51/0x150 [] kstrdup+0x32/0x60 [] __bch2_read_super+0x11a/0x4e0 [bcachefs] [] bch2_fs_open+0x262/0x1710 [bcachefs] [] bch2_mount+0x4c4/0x640 [bcachefs] [] legacy_get_tree+0x30/0x60 [] vfs_get_tree+0x28/0xf0 [] path_mount+0x475/0xb60 [] __x64_sys_mount+0x105/0x140 [] do_syscall_64+0x42/0xf0 [] entry_SYSCALL_64_after_hwframe+0x6e/0x76 ==================================================================== The leak happens if bdev_open_by_path() failed to open a block device then it goes label 'out' directly without call of bch2_free_super(). Fix it by going to label 'err' instead of 'out' if bdev_open_by_path() fails. Signed-off-by: Su Yue Signed-off-by: Kent Overstreet --- fs/bcachefs/super-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index d60c7d27a0477..36988add581fb 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -717,7 +717,7 @@ static int __bch2_read_super(const char *path, struct bch_opts *opts, if (IS_ERR(sb->bdev_handle)) { ret = PTR_ERR(sb->bdev_handle); - goto out; + goto err; } sb->bdev = sb->bdev_handle->bdev; -- GitLab From 04eb57930e4e471d8f16455a748df2009d43c139 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 10 Feb 2024 16:28:52 -0500 Subject: [PATCH 0925/1405] bcachefs: fix missing endiannes conversion in sb_members Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-members.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index a45354d2acde9..eff5ce18c69c0 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -421,7 +421,7 @@ void bch2_dev_errors_reset(struct bch_dev *ca) m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++) m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i])); - m->errors_reset_time = ktime_get_real_seconds(); + m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds()); bch2_write_super(c); mutex_unlock(&c->sb_lock); -- GitLab From 7d99a70b65951108d82e1618c67abe69c3ed7720 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 10 Feb 2024 14:43:59 +0100 Subject: [PATCH 0926/1405] ASoC: Intel: Boards: Fix NULL pointer deref in BYT/CHT boards Since commit 13f58267cda3 ("ASoC: soc.h: don't create dummy Component via COMP_DUMMY()") dummy snd_soc_dai_link.codecs entries no longer have a name set. This means that when looking for the codec dai_link the machine driver can no longer unconditionally run strcmp() on snd_soc_dai_link.codecs[0].name since this may now be NULL. Add a check for snd_soc_dai_link.codecs[0].name being NULL to all BYT/CHT machine drivers to avoid NULL pointer dereferences in their probe() methods. Fixes: 13f58267cda3 ("ASoC: soc.h: don't create dummy Component via COMP_DUMMY()") Cc: Kuninori Morimoto Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240210134400.24913-2-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcht_cx2072x.c | 3 ++- sound/soc/intel/boards/bytcht_da7213.c | 3 ++- sound/soc/intel/boards/bytcht_es8316.c | 3 ++- sound/soc/intel/boards/bytcr_rt5640.c | 3 ++- sound/soc/intel/boards/bytcr_rt5651.c | 3 ++- sound/soc/intel/boards/bytcr_wm5102.c | 3 ++- sound/soc/intel/boards/cht_bsw_rt5645.c | 3 ++- sound/soc/intel/boards/cht_bsw_rt5672.c | 3 ++- 8 files changed, 16 insertions(+), 8 deletions(-) diff --git a/sound/soc/intel/boards/bytcht_cx2072x.c b/sound/soc/intel/boards/bytcht_cx2072x.c index 10a84a2c1036e..c014d85a08b24 100644 --- a/sound/soc/intel/boards/bytcht_cx2072x.c +++ b/sound/soc/intel/boards/bytcht_cx2072x.c @@ -241,7 +241,8 @@ static int snd_byt_cht_cx2072x_probe(struct platform_device *pdev) /* fix index of codec dai */ for (i = 0; i < ARRAY_SIZE(byt_cht_cx2072x_dais); i++) { - if (!strcmp(byt_cht_cx2072x_dais[i].codecs->name, + if (byt_cht_cx2072x_dais[i].codecs->name && + !strcmp(byt_cht_cx2072x_dais[i].codecs->name, "i2c-14F10720:00")) { dai_index = i; break; diff --git a/sound/soc/intel/boards/bytcht_da7213.c b/sound/soc/intel/boards/bytcht_da7213.c index 7e5eea690023d..f4ac3ddd148b8 100644 --- a/sound/soc/intel/boards/bytcht_da7213.c +++ b/sound/soc/intel/boards/bytcht_da7213.c @@ -245,7 +245,8 @@ static int bytcht_da7213_probe(struct platform_device *pdev) /* fix index of codec dai */ for (i = 0; i < ARRAY_SIZE(dailink); i++) { - if (!strcmp(dailink[i].codecs->name, "i2c-DLGS7213:00")) { + if (dailink[i].codecs->name && + !strcmp(dailink[i].codecs->name, "i2c-DLGS7213:00")) { dai_index = i; break; } diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c index 1564a88a885ef..2fcec2e02bb53 100644 --- a/sound/soc/intel/boards/bytcht_es8316.c +++ b/sound/soc/intel/boards/bytcht_es8316.c @@ -546,7 +546,8 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev) /* fix index of codec dai */ for (i = 0; i < ARRAY_SIZE(byt_cht_es8316_dais); i++) { - if (!strcmp(byt_cht_es8316_dais[i].codecs->name, + if (byt_cht_es8316_dais[i].codecs->name && + !strcmp(byt_cht_es8316_dais[i].codecs->name, "i2c-ESSX8316:00")) { dai_index = i; break; diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 42466b4b1ca45..03be5e26ec4ab 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -1652,7 +1652,8 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) /* fix index of codec dai */ for (i = 0; i < ARRAY_SIZE(byt_rt5640_dais); i++) { - if (!strcmp(byt_rt5640_dais[i].codecs->name, + if (byt_rt5640_dais[i].codecs->name && + !strcmp(byt_rt5640_dais[i].codecs->name, "i2c-10EC5640:00")) { dai_index = i; break; diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c index f9fe8414f454f..80c841b000a31 100644 --- a/sound/soc/intel/boards/bytcr_rt5651.c +++ b/sound/soc/intel/boards/bytcr_rt5651.c @@ -910,7 +910,8 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) /* fix index of codec dai */ for (i = 0; i < ARRAY_SIZE(byt_rt5651_dais); i++) { - if (!strcmp(byt_rt5651_dais[i].codecs->name, + if (byt_rt5651_dais[i].codecs->name && + !strcmp(byt_rt5651_dais[i].codecs->name, "i2c-10EC5651:00")) { dai_index = i; break; diff --git a/sound/soc/intel/boards/bytcr_wm5102.c b/sound/soc/intel/boards/bytcr_wm5102.c index 6978ebde66935..cccb5e90c0fef 100644 --- a/sound/soc/intel/boards/bytcr_wm5102.c +++ b/sound/soc/intel/boards/bytcr_wm5102.c @@ -605,7 +605,8 @@ static int snd_byt_wm5102_mc_probe(struct platform_device *pdev) /* find index of codec dai */ for (i = 0; i < ARRAY_SIZE(byt_wm5102_dais); i++) { - if (!strcmp(byt_wm5102_dais[i].codecs->name, + if (byt_wm5102_dais[i].codecs->name && + !strcmp(byt_wm5102_dais[i].codecs->name, "wm5102-codec")) { dai_index = i; break; diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c index c952a96cde7eb..7773f61064f4d 100644 --- a/sound/soc/intel/boards/cht_bsw_rt5645.c +++ b/sound/soc/intel/boards/cht_bsw_rt5645.c @@ -571,7 +571,8 @@ static int snd_cht_mc_probe(struct platform_device *pdev) /* set correct codec name */ for (i = 0; i < ARRAY_SIZE(cht_dailink); i++) - if (!strcmp(card->dai_link[i].codecs->name, + if (card->dai_link[i].codecs->name && + !strcmp(card->dai_link[i].codecs->name, "i2c-10EC5645:00")) { card->dai_link[i].codecs->name = drv->codec_name; dai_index = i; diff --git a/sound/soc/intel/boards/cht_bsw_rt5672.c b/sound/soc/intel/boards/cht_bsw_rt5672.c index 8cf0b33cc02eb..be2d1a8dbca80 100644 --- a/sound/soc/intel/boards/cht_bsw_rt5672.c +++ b/sound/soc/intel/boards/cht_bsw_rt5672.c @@ -466,7 +466,8 @@ static int snd_cht_mc_probe(struct platform_device *pdev) /* find index of codec dai */ for (i = 0; i < ARRAY_SIZE(cht_dailink); i++) { - if (!strcmp(cht_dailink[i].codecs->name, RT5672_I2C_DEFAULT)) { + if (cht_dailink[i].codecs->name && + !strcmp(cht_dailink[i].codecs->name, RT5672_I2C_DEFAULT)) { dai_index = i; break; } -- GitLab From 930375d34de67e129566caca008de0bbc54a4646 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 10 Feb 2024 14:44:00 +0100 Subject: [PATCH 0927/1405] ASoC: Intel: cht_bsw_rt5645: Cleanup codec_name handling 4 fixes / cleanups to the rt5645 mc driver's codec_name handling: 1. In the for loop looking for the dai_index for the codec, replace card->dai_link[i] with cht_dailink[i]. The for loop already uses ARRAY_SIZE(cht_dailink) as bound and card->dai_link is just a pointer to cht_dailink using card->dai_link only obfuscates that cht_dailink is being modified directly rather then say a copy of cht_dailink. Using cht_dailink[i] also makes the code consistent with other machine drivers. 2. Don't set cht_dailink[dai_index].codecs->name in the for loop, this immediately gets overridden using acpi_dev_name(adev) directly below the loop. 3. Add a missing break to the loop. 4. Remove the now no longer used (only set, never read) codec_name field from struct cht_mc_private. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240210134400.24913-3-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/cht_bsw_rt5645.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c index 7773f61064f4d..eb41b7115d01d 100644 --- a/sound/soc/intel/boards/cht_bsw_rt5645.c +++ b/sound/soc/intel/boards/cht_bsw_rt5645.c @@ -40,7 +40,6 @@ struct cht_acpi_card { struct cht_mc_private { struct snd_soc_jack jack; struct cht_acpi_card *acpi_card; - char codec_name[SND_ACPI_I2C_ID_LEN]; struct clk *mclk; }; @@ -567,15 +566,14 @@ static int snd_cht_mc_probe(struct platform_device *pdev) } card->dev = &pdev->dev; - sprintf(drv->codec_name, "i2c-%s:00", drv->acpi_card->codec_id); /* set correct codec name */ for (i = 0; i < ARRAY_SIZE(cht_dailink); i++) - if (card->dai_link[i].codecs->name && - !strcmp(card->dai_link[i].codecs->name, + if (cht_dailink[i].codecs->name && + !strcmp(cht_dailink[i].codecs->name, "i2c-10EC5645:00")) { - card->dai_link[i].codecs->name = drv->codec_name; dai_index = i; + break; } /* fixup codec name based on HID */ -- GitLab From 6f98e44984d5eff599906c0376c027df985bf38b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 10 Feb 2024 17:40:06 +0100 Subject: [PATCH 0928/1405] spi: ppc4xx: Fix fallout from include cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver uses several symbols declared in , e.g module_platform_driver(). Include this header explicitly now that doesn't include any more. Fixes: ef175b29a242 ("of: Stop circularly including of_device.h and of_platform.h") Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20240210164006.208149-6-u.kleine-koenig@pengutronix.de Signed-off-by: Mark Brown --- drivers/spi/spi-ppc4xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c index 412d6e6782246..2290b40459ef5 100644 --- a/drivers/spi/spi-ppc4xx.c +++ b/drivers/spi/spi-ppc4xx.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include -- GitLab From de4af897ddf242aea18ee90d3ad2e21b4e64359c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 10 Feb 2024 17:40:07 +0100 Subject: [PATCH 0929/1405] spi: ppc4xx: Fix fallout from rename in struct spi_bitbang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I failed to adapt this driver because it's not enabled in a powerpc allmodconfig build and also wasn't hit by my grep expertise. Fix accordingly. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402100815.XQXw9XCF-lkp@intel.com/ Fixes: 2259233110d9 ("spi: bitbang: Follow renaming of SPI "master" to "controller"") Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20240210164006.208149-7-u.kleine-koenig@pengutronix.de Signed-off-by: Mark Brown --- drivers/spi/spi-ppc4xx.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c index 2290b40459ef5..2d5536efa9f81 100644 --- a/drivers/spi/spi-ppc4xx.c +++ b/drivers/spi/spi-ppc4xx.c @@ -364,22 +364,22 @@ static int spi_ppc4xx_of_probe(struct platform_device *op) /* Setup the state for the bitbang driver */ bbp = &hw->bitbang; - bbp->master = hw->host; + bbp->ctlr = hw->host; bbp->setup_transfer = spi_ppc4xx_setupxfer; bbp->txrx_bufs = spi_ppc4xx_txrx; bbp->use_dma = 0; - bbp->master->setup = spi_ppc4xx_setup; - bbp->master->cleanup = spi_ppc4xx_cleanup; - bbp->master->bits_per_word_mask = SPI_BPW_MASK(8); - bbp->master->use_gpio_descriptors = true; + bbp->ctlr->setup = spi_ppc4xx_setup; + bbp->ctlr->cleanup = spi_ppc4xx_cleanup; + bbp->ctlr->bits_per_word_mask = SPI_BPW_MASK(8); + bbp->ctlr->use_gpio_descriptors = true; /* * The SPI core will count the number of GPIO descriptors to figure * out the number of chip selects available on the platform. */ - bbp->master->num_chipselect = 0; + bbp->ctlr->num_chipselect = 0; /* the spi->mode bits understood by this driver: */ - bbp->master->mode_bits = + bbp->ctlr->mode_bits = SPI_CPHA | SPI_CPOL | SPI_CS_HIGH | SPI_LSB_FIRST; /* Get the clock for the OPB */ -- GitLab From b3aa619a8b4706f35cb62f780c14e68796b37f3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 10 Feb 2024 17:40:08 +0100 Subject: [PATCH 0930/1405] spi: ppc4xx: Drop write-only variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 24778be20f87 ("spi: convert drivers to use bits_per_word_mask") the bits_per_word variable is only written to. The check that was there before isn't needed any more as the spi core ensures that only 8 bit transfers are used, so the variable can go away together with all assignments to it. Fixes: 24778be20f87 ("spi: convert drivers to use bits_per_word_mask") Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20240210164006.208149-8-u.kleine-koenig@pengutronix.de Signed-off-by: Mark Brown --- drivers/spi/spi-ppc4xx.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c index 2d5536efa9f81..942c3117ab3a9 100644 --- a/drivers/spi/spi-ppc4xx.c +++ b/drivers/spi/spi-ppc4xx.c @@ -168,10 +168,8 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t) int scr; u8 cdm = 0; u32 speed; - u8 bits_per_word; /* Start with the generic configuration for this device. */ - bits_per_word = spi->bits_per_word; speed = spi->max_speed_hz; /* @@ -179,9 +177,6 @@ static int spi_ppc4xx_setupxfer(struct spi_device *spi, struct spi_transfer *t) * the transfer to overwrite the generic configuration with zeros. */ if (t) { - if (t->bits_per_word) - bits_per_word = t->bits_per_word; - if (t->speed_hz) speed = min(t->speed_hz, spi->max_speed_hz); } -- GitLab From 6ef5d5b92f7117b324efaac72b3db27ae8bb3082 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sun, 11 Feb 2024 12:58:34 +0300 Subject: [PATCH 0931/1405] ASoC: rt5645: Fix deadlock in rt5645_jack_detect_work() There is a path in rt5645_jack_detect_work(), where rt5645->jd_mutex is left locked forever. That may lead to deadlock when rt5645_jack_detect_work() is called for the second time. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: cdba4301adda ("ASoC: rt5650: add mutex to avoid the jack detection failure") Signed-off-by: Alexey Khoroshilov Link: https://lore.kernel.org/r/1707645514-21196-1-git-send-email-khoroshilov@ispras.ru Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 5150d6ee37481..0cc2fa131d483 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3317,6 +3317,7 @@ static void rt5645_jack_detect_work(struct work_struct *work) report, SND_JACK_HEADPHONE); snd_soc_jack_report(rt5645->mic_jack, report, SND_JACK_MICROPHONE); + mutex_unlock(&rt5645->jd_mutex); return; case 4: val = snd_soc_component_read(rt5645->component, RT5645_A_JD_CTRL1) & 0x0020; -- GitLab From c4b603c6e2df3a17831731d8bbec5c16fa7bbdf8 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 9 Feb 2024 01:43:14 +0200 Subject: [PATCH 0932/1405] ASoC: SOF: amd: Fix locking in ACP IRQ handler A recent change in acp_irq_thread() was meant to address a potential race condition while trying to acquire the hardware semaphore responsible for the synchronization between firmware and host IPC interrupts. This resulted in an improper use of the IPC spinlock, causing normal kernel memory allocations (which may sleep) inside atomic contexts: 1707255557.133976 kernel: BUG: sleeping function called from invalid context at include/linux/sched/mm.h:315 ... 1707255557.134757 kernel: sof_ipc3_rx_msg+0x70/0x130 [snd_sof] 1707255557.134793 kernel: acp_sof_ipc_irq_thread+0x1e0/0x550 [snd_sof_amd_acp] 1707255557.134855 kernel: acp_irq_thread+0xa3/0x130 [snd_sof_amd_acp] 1707255557.134904 kernel: ? irq_thread+0xb5/0x1e0 1707255557.134947 kernel: ? __pfx_irq_thread_fn+0x10/0x10 1707255557.134985 kernel: irq_thread_fn+0x23/0x60 Moreover, there are attempts to lock a mutex from the same atomic context: 1707255557.136357 kernel: ============================= 1707255557.136393 kernel: [ BUG: Invalid wait context ] 1707255557.136413 kernel: 6.8.0-rc3-next-20240206-audio-next #9 Tainted: G W 1707255557.136432 kernel: ----------------------------- 1707255557.136451 kernel: irq/66-AudioDSP/502 is trying to lock: 1707255557.136470 kernel: ffff965152f26af8 (&sb->s_type->i_mutex_key#2){+.+.}-{3:3}, at: start_creating.part.0+0x5f/0x180 ... 1707255557.137429 kernel: start_creating.part.0+0x5f/0x180 1707255557.137457 kernel: __debugfs_create_file+0x61/0x210 1707255557.137475 kernel: snd_sof_debugfs_io_item+0x75/0xc0 [snd_sof] 1707255557.137494 kernel: sof_ipc3_do_rx_work+0x7cf/0x9f0 [snd_sof] 1707255557.137513 kernel: sof_ipc3_rx_msg+0xb3/0x130 [snd_sof] 1707255557.137532 kernel: acp_sof_ipc_irq_thread+0x1e0/0x550 [snd_sof_amd_acp] 1707255557.137551 kernel: acp_irq_thread+0xa3/0x130 [snd_sof_amd_acp] Fix the issues by reducing the lock scope in acp_irq_thread(), so that it guards only the hardware semaphore acquiring attempt. Additionally, restore the initial locking in acp_sof_ipc_irq_thread() to synchronize the handling of immediate replies from DSP core. Fixes: 802134c8c2c8 ("ASoC: SOF: amd: Refactor spinlock_irq(&sdev->ipc_lock) sequence in irq_handler") Signed-off-by: Cristian Ciocaltea Link: https://lore.kernel.org/r/20240208234315.2182048-1-cristian.ciocaltea@collabora.com Signed-off-by: Mark Brown --- sound/soc/sof/amd/acp-ipc.c | 2 ++ sound/soc/sof/amd/acp.c | 17 ++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/sound/soc/sof/amd/acp-ipc.c b/sound/soc/sof/amd/acp-ipc.c index 2743f07a5e081..b44b1b1adb6ed 100644 --- a/sound/soc/sof/amd/acp-ipc.c +++ b/sound/soc/sof/amd/acp-ipc.c @@ -188,11 +188,13 @@ irqreturn_t acp_sof_ipc_irq_thread(int irq, void *context) dsp_ack = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP_SCRATCH_REG_0 + dsp_ack_write); if (dsp_ack) { + spin_lock_irq(&sdev->ipc_lock); /* handle immediate reply from DSP core */ acp_dsp_ipc_get_reply(sdev); snd_sof_ipc_reply(sdev, 0); /* set the done bit */ acp_dsp_ipc_dsp_done(sdev); + spin_unlock_irq(&sdev->ipc_lock); ipc_irq = true; } diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c index 32a741fcb84ff..07632ae6ccf5e 100644 --- a/sound/soc/sof/amd/acp.c +++ b/sound/soc/sof/amd/acp.c @@ -355,21 +355,20 @@ static irqreturn_t acp_irq_thread(int irq, void *context) unsigned int count = ACP_HW_SEM_RETRY_COUNT; spin_lock_irq(&sdev->ipc_lock); - while (snd_sof_dsp_read(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset)) { - /* Wait until acquired HW Semaphore lock or timeout */ - count--; - if (!count) { - dev_err(sdev->dev, "%s: Failed to acquire HW lock\n", __func__); - spin_unlock_irq(&sdev->ipc_lock); - return IRQ_NONE; - } + /* Wait until acquired HW Semaphore lock or timeout */ + while (snd_sof_dsp_read(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset) && --count) + ; + spin_unlock_irq(&sdev->ipc_lock); + + if (!count) { + dev_err(sdev->dev, "%s: Failed to acquire HW lock\n", __func__); + return IRQ_NONE; } sof_ops(sdev)->irq_thread(irq, sdev); /* Unlock or Release HW Semaphore */ snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset, 0x0); - spin_unlock_irq(&sdev->ipc_lock); return IRQ_HANDLED; }; -- GitLab From 841c35169323cd833294798e58b9bf63fa4fa1de Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 11 Feb 2024 12:18:13 -0800 Subject: [PATCH 0933/1405] Linux 6.8-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a171eafce2a3b..7e0b2ad98905b 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* -- GitLab From cffe487026be13eaf37ea28b783d9638ab147204 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 8 Feb 2024 13:18:46 +0300 Subject: [PATCH 0934/1405] cifs: fix underflow in parse_server_interfaces() In this loop, we step through the buffer and after each item we check if the size_left is greater than the minimum size we need. However, the problem is that "bytes_left" is type ssize_t while sizeof() is type size_t. That means that because of type promotion, the comparison is done as an unsigned and if we have negative bytes left the loop continues instead of ending. Fixes: fe856be475f7 ("CIFS: parse and store info on iface queries") Signed-off-by: Dan Carpenter Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 83c898afc8354..755f1c66b573a 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -619,7 +619,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, goto out; } - while (bytes_left >= sizeof(*p)) { + while (bytes_left >= (ssize_t)sizeof(*p)) { memset(&tmp_iface, 0, sizeof(tmp_iface)); tmp_iface.speed = le64_to_cpu(p->LinkSpeed); tmp_iface.rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0; -- GitLab From 79520587fe42cd4988aff8695d60621e689109cb Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 9 Feb 2024 11:25:42 +0000 Subject: [PATCH 0935/1405] cifs: update the same create_guid on replay File open requests made to the server contain a CreateGuid, which is used by the server to identify the open request. If the same request needs to be replayed, it needs to be sent with the same CreateGuid in the durable handle v2 context. Without doing so, we could end up leaking handles on the server when: 1. multichannel is used AND 2. connection goes down, but not for all channels This is because the replayed open request would have a new CreateGuid and the server will treat this as a new request and open a new handle. This change fixes this by reusing the existing create_guid stored in the cached fid struct. REF: MS-SMB2 4.9 Replay Create Request on an Alternate Channel Fixes: 4f1fffa23769 ("cifs: commands that are retried should have replay flag set") Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cached_dir.c | 1 + fs/smb/client/cifsglob.h | 1 + fs/smb/client/smb2ops.c | 4 ++++ fs/smb/client/smb2pdu.c | 10 ++++++++-- 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 1daeb5714faad..3de5047a7ff98 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -242,6 +242,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, .desired_access = FILE_READ_DATA | FILE_READ_ATTRIBUTES, .disposition = FILE_OPEN, .fid = pfid, + .replay = !!(retries), }; rc = SMB2_open_init(tcon, server, diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index c86a72c9d9ecd..53c75cfb33ab9 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1378,6 +1378,7 @@ struct cifs_open_parms { struct cifs_fid *fid; umode_t mode; bool reconnect:1; + bool replay:1; /* indicates that this open is for a replay */ }; struct cifs_fid { diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 755f1c66b573a..6b3c384ead0d8 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -1204,6 +1204,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), .fid = &fid, + .replay = !!(retries), }; rc = SMB2_open_init(tcon, server, @@ -1569,6 +1570,7 @@ smb2_ioctl_query_info(const unsigned int xid, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, create_options), .fid = &fid, + .replay = !!(retries), }; if (qi.flags & PASSTHRU_FSCTL) { @@ -2295,6 +2297,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), .fid = fid, + .replay = !!(retries), }; rc = SMB2_open_init(tcon, server, @@ -2681,6 +2684,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), .fid = &fid, + .replay = !!(retries), }; rc = SMB2_open_init(tcon, server, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 4085ce27fd388..608ee05491e26 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2404,8 +2404,13 @@ create_durable_v2_buf(struct cifs_open_parms *oparms) */ buf->dcontext.Timeout = cpu_to_le32(oparms->tcon->handle_timeout); buf->dcontext.Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT); - generate_random_uuid(buf->dcontext.CreateGuid); - memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16); + + /* for replay, we should not overwrite the existing create guid */ + if (!oparms->replay) { + generate_random_uuid(buf->dcontext.CreateGuid); + memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16); + } else + memcpy(buf->dcontext.CreateGuid, pfid->create_guid, 16); /* SMB2_CREATE_DURABLE_HANDLE_REQUEST is "DH2Q" */ buf->Name[0] = 'D'; @@ -3142,6 +3147,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, /* reinitialize for possible replay */ flags = 0; server = cifs_pick_channel(ses); + oparms->replay = !!(retries); cifs_dbg(FYI, "create/open\n"); if (!ses || !server) -- GitLab From 28083ff18d3f65ecd64857f4495623135dd1f132 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Wed, 7 Feb 2024 11:24:46 +0100 Subject: [PATCH 0936/1405] accel/ivpu: Fix DevTLB errors on suspend/resume and recovery Issue IP reset before shutdown in order to complete all upstream requests to the SOC. Without this DevTLB is complaining about incomplete transactions and NPU cannot resume from suspend. This problem is only happening on recent IFWI releases. IP reset in rare corner cases can mess up PCI configuration, so save it before the reset. After this happens it is also impossible to issue PLL requests and D0->D3->D0 cycle is needed to recover the NPU. Add WP 0 request on power up, so the PUNIT is always notified about NPU reset. Use D0/D3 cycle for recovery as it can recover from failed IP reset and FLR cannot. Fixes: 3f7c0634926d ("accel/ivpu/37xx: Fix hangs related to MMIO reset") Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240207102446.3126981-1-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_37xx.c | 44 ++++++++++++++++++++++--------- drivers/accel/ivpu/ivpu_pm.c | 39 +++++++++++++++------------ 2 files changed, 54 insertions(+), 29 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c index 77accd029c4a7..89af1006df558 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ b/drivers/accel/ivpu/ivpu_hw_37xx.c @@ -510,16 +510,6 @@ static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev) return ret; } -static int ivpu_boot_pwr_domain_disable(struct ivpu_device *vdev) -{ - ivpu_boot_dpu_active_drive(vdev, false); - ivpu_boot_pwr_island_isolation_drive(vdev, true); - ivpu_boot_pwr_island_trickle_drive(vdev, false); - ivpu_boot_pwr_island_drive(vdev, false); - - return ivpu_boot_wait_for_pwr_island_status(vdev, 0x0); -} - static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev) { u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES); @@ -616,12 +606,37 @@ static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev) return 0; } +static int ivpu_hw_37xx_ip_reset(struct ivpu_device *vdev) +{ + int ret; + u32 val; + + if (IVPU_WA(punit_disabled)) + return 0; + + ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n"); + return ret; + } + + val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_IP_RESET); + val = REG_SET_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, val); + REGB_WR32(VPU_37XX_BUTTRESS_VPU_IP_RESET, val); + + ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) + ivpu_err(vdev, "Timed out waiting for RESET completion\n"); + + return ret; +} + static int ivpu_hw_37xx_reset(struct ivpu_device *vdev) { int ret = 0; - if (ivpu_boot_pwr_domain_disable(vdev)) { - ivpu_err(vdev, "Failed to disable power domain\n"); + if (ivpu_hw_37xx_ip_reset(vdev)) { + ivpu_err(vdev, "Failed to reset NPU\n"); ret = -EIO; } @@ -661,6 +676,11 @@ static int ivpu_hw_37xx_power_up(struct ivpu_device *vdev) { int ret; + /* PLL requests may fail when powering down, so issue WP 0 here */ + ret = ivpu_pll_disable(vdev); + if (ret) + ivpu_warn(vdev, "Failed to disable PLL: %d\n", ret); + ret = ivpu_hw_37xx_d0i3_disable(vdev); if (ret) ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret); diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index f501f27ebafdf..5f73854234ba9 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -58,11 +58,14 @@ static int ivpu_suspend(struct ivpu_device *vdev) { int ret; + /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */ + pci_save_state(to_pci_dev(vdev->drm.dev)); + ret = ivpu_shutdown(vdev); - if (ret) { + if (ret) ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret); - return ret; - } + + pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); return ret; } @@ -71,6 +74,9 @@ static int ivpu_resume(struct ivpu_device *vdev) { int ret; + pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0); + pci_restore_state(to_pci_dev(vdev->drm.dev)); + retry: ret = ivpu_hw_power_up(vdev); if (ret) { @@ -120,15 +126,20 @@ static void ivpu_pm_recovery_work(struct work_struct *work) ivpu_fw_log_dump(vdev); -retry: - ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev)); - if (ret == -EAGAIN && !drm_dev_is_unplugged(&vdev->drm)) { - cond_resched(); - goto retry; - } + atomic_inc(&vdev->pm->reset_counter); + atomic_set(&vdev->pm->reset_pending, 1); + down_write(&vdev->pm->reset_lock); + + ivpu_suspend(vdev); + ivpu_pm_prepare_cold_boot(vdev); + ivpu_jobs_abort_all(vdev); + + ret = ivpu_resume(vdev); + if (ret) + ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); - if (ret && ret != -EAGAIN) - ivpu_err(vdev, "Failed to reset VPU: %d\n", ret); + up_write(&vdev->pm->reset_lock); + atomic_set(&vdev->pm->reset_pending, 0); kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); pm_runtime_mark_last_busy(vdev->drm.dev); @@ -200,9 +211,6 @@ int ivpu_pm_suspend_cb(struct device *dev) ivpu_suspend(vdev); ivpu_pm_prepare_warm_boot(vdev); - pci_save_state(to_pci_dev(dev)); - pci_set_power_state(to_pci_dev(dev), PCI_D3hot); - ivpu_dbg(vdev, PM, "Suspend done.\n"); return 0; @@ -216,9 +224,6 @@ int ivpu_pm_resume_cb(struct device *dev) ivpu_dbg(vdev, PM, "Resume..\n"); - pci_set_power_state(to_pci_dev(dev), PCI_D0); - pci_restore_state(to_pci_dev(dev)); - ret = ivpu_resume(vdev); if (ret) ivpu_err(vdev, "Failed to resume: %d\n", ret); -- GitLab From bc4cbc9d260ba8358ca63662919f4bb223cb603b Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Tue, 6 Feb 2024 12:05:29 +0100 Subject: [PATCH 0937/1405] tools/rtla: Fix Makefile compiler options for clang The following errors are showing up when compiling rtla with clang: $ make HOSTCC=clang CC=clang LLVM_IAS=1 [...] clang -O -g -DVERSION=\"6.8.0-rc1\" -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong -fasynchronous-unwind-tables -fstack-clash-protection -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized $(pkg-config --cflags libtracefs) -c -o src/utils.o src/utils.c clang: warning: optimization flag '-ffat-lto-objects' is not supported [-Wignored-optimization-argument] warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] 1 warning generated. clang -o rtla -ggdb src/osnoise.o src/osnoise_hist.o src/osnoise_top.o src/rtla.o src/timerlat_aa.o src/timerlat.o src/timerlat_hist.o src/timerlat_top.o src/timerlat_u.o src/trace.o src/utils.o $(pkg-config --libs libtracefs) src/osnoise.o: file not recognized: file format not recognized clang: error: linker command failed with exit code 1 (use -v to see invocation) make: *** [Makefile:110: rtla] Error 1 Solve these issues by: - removing -ffat-lto-objects and -Wno-maybe-uninitialized if using clang - informing the linker about -flto=auto Link: https://lore.kernel.org/linux-trace-kernel/567ac1b94effc228ce9a0225b9df7232a9b35b55.1707217097.git.bristot@kernel.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Bill Wendling Cc: Justin Stitt Fixes: 1a7b22ab15eb ("tools/rtla: Build with EXTRA_{C,LD}FLAGS") Suggested-by: Donald Zickus Signed-off-by: Daniel Bristot de Oliveira --- tools/tracing/rtla/Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile index 2456a399eb9ae..afd18c678ff5a 100644 --- a/tools/tracing/rtla/Makefile +++ b/tools/tracing/rtla/Makefile @@ -28,10 +28,15 @@ FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \ -fasynchronous-unwind-tables -fstack-clash-protection WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized +ifeq ($(CC),clang) + FOPTS := $(filter-out -ffat-lto-objects, $(FOPTS)) + WOPTS := $(filter-out -Wno-maybe-uninitialized, $(WOPTS)) +endif + TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs) CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) $(EXTRA_CFLAGS) -LDFLAGS := -ggdb $(EXTRA_LDFLAGS) +LDFLAGS := -flto=auto -ggdb $(EXTRA_LDFLAGS) LIBS := $$($(PKG_CONFIG) --libs libtracefs) SRC := $(wildcard src/*.c) -- GitLab From 64dc40f7523369912d7adb22c8cb655f71610505 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Tue, 6 Feb 2024 12:05:30 +0100 Subject: [PATCH 0938/1405] tools/rtla: Fix uninitialized bucket/data->bucket_size warning When compiling rtla with clang, I am getting the following warnings: $ make HOSTCC=clang CC=clang LLVM_IAS=1 [..] clang -O -g -DVERSION=\"6.8.0-rc3\" -flto=auto -fexceptions -fstack-protector-strong -fasynchronous-unwind-tables -fstack-clash-protection -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS $(pkg-config --cflags libtracefs) -c -o src/osnoise_hist.o src/osnoise_hist.c src/osnoise_hist.c:138:6: warning: variable 'bucket' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] 138 | if (data->bucket_size) | ^~~~~~~~~~~~~~~~~ src/osnoise_hist.c:149:6: note: uninitialized use occurs here 149 | if (bucket < entries) | ^~~~~~ src/osnoise_hist.c:138:2: note: remove the 'if' if its condition is always true 138 | if (data->bucket_size) | ^~~~~~~~~~~~~~~~~~~~~~ 139 | bucket = duration / data->bucket_size; src/osnoise_hist.c:132:12: note: initialize the variable 'bucket' to silence this warning 132 | int bucket; | ^ | = 0 1 warning generated. [...] clang -O -g -DVERSION=\"6.8.0-rc3\" -flto=auto -fexceptions -fstack-protector-strong -fasynchronous-unwind-tables -fstack-clash-protection -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS $(pkg-config --cflags libtracefs) -c -o src/timerlat_hist.o src/timerlat_hist.c src/timerlat_hist.c:181:6: warning: variable 'bucket' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] 181 | if (data->bucket_size) | ^~~~~~~~~~~~~~~~~ src/timerlat_hist.c:204:6: note: uninitialized use occurs here 204 | if (bucket < entries) | ^~~~~~ src/timerlat_hist.c:181:2: note: remove the 'if' if its condition is always true 181 | if (data->bucket_size) | ^~~~~~~~~~~~~~~~~~~~~~ 182 | bucket = latency / data->bucket_size; src/timerlat_hist.c:175:12: note: initialize the variable 'bucket' to silence this warning 175 | int bucket; | ^ | = 0 1 warning generated. This is a legit warning, but data->bucket_size is always > 0 (see timerlat_hist_parse_args()), so the if is not necessary. Remove the unneeded if (data->bucket_size) to avoid the warning. Link: https://lkml.kernel.org/r/6e1b1665cd99042ae705b3e0fc410858c4c42346.1707217097.git.bristot@kernel.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Bill Wendling Cc: Justin Stitt Cc: Donald Zickus Fixes: 1eeb6328e8b3 ("rtla/timerlat: Add timerlat hist mode") Fixes: 829a6c0b5698 ("rtla/osnoise: Add the hist mode") Signed-off-by: Daniel Bristot de Oliveira --- tools/tracing/rtla/src/osnoise_hist.c | 3 +-- tools/tracing/rtla/src/timerlat_hist.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index 8f81fa0073648..67128171c29d7 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -135,8 +135,7 @@ static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu, if (params->output_divisor) duration = duration / params->output_divisor; - if (data->bucket_size) - bucket = duration / data->bucket_size; + bucket = duration / data->bucket_size; total_duration = duration * count; diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 47d3d8b53cb21..3a5b8c409e7d8 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -178,8 +178,7 @@ timerlat_hist_update(struct osnoise_tool *tool, int cpu, if (params->output_divisor) latency = latency / params->output_divisor; - if (data->bucket_size) - bucket = latency / data->bucket_size; + bucket = latency / data->bucket_size; if (!context) { hist = data->hist[cpu].irq; -- GitLab From 30369084ac6e27479a347899e74f523e6ca29b89 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Tue, 6 Feb 2024 12:05:31 +0100 Subject: [PATCH 0939/1405] tools/rtla: Fix clang warning about mount_point var size clang is reporting this warning: $ make HOSTCC=clang CC=clang LLVM_IAS=1 [...] clang -O -g -DVERSION=\"6.8.0-rc3\" -flto=auto -fexceptions -fstack-protector-strong -fasynchronous-unwind-tables -fstack-clash-protection -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS $(pkg-config --cflags libtracefs) -c -o src/utils.o src/utils.c src/utils.c:548:66: warning: 'fscanf' may overflow; destination buffer in argument 3 has size 1024, but the corresponding specifier may require size 1025 [-Wfortify-source] 548 | while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) { | ^ Increase mount_point variable size to MAX_PATH+1 to avoid the overflow. Link: https://lkml.kernel.org/r/1b46712e93a2f4153909514a36016959dcc4021c.1707217097.git.bristot@kernel.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Bill Wendling Cc: Justin Stitt Cc: Donald Zickus Fixes: a957cbc02531 ("rtla: Add -C cgroup support") Signed-off-by: Daniel Bristot de Oliveira --- tools/tracing/rtla/src/utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c index c769d7b3842c0..b998b24edf1e0 100644 --- a/tools/tracing/rtla/src/utils.c +++ b/tools/tracing/rtla/src/utils.c @@ -536,7 +536,7 @@ int set_cpu_dma_latency(int32_t latency) */ static const int find_mount(const char *fs, char *mp, int sizeof_mp) { - char mount_point[MAX_PATH]; + char mount_point[MAX_PATH+1]; char type[100]; int found = 0; FILE *fp; -- GitLab From 084ce16df0f060efd371092a09a7ae74a536dc11 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Tue, 6 Feb 2024 12:05:32 +0100 Subject: [PATCH 0940/1405] tools/rtla: Remove unused sched_getattr() function Clang is reporting: $ make HOSTCC=clang CC=clang LLVM_IAS=1 [...] clang -O -g -DVERSION=\"6.8.0-rc3\" -flto=auto -fexceptions -fstack-protector-strong -fasynchronous-unwind-tables -fstack-clash-protection -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS $(pkg-config --cflags libtracefs) -c -o src/utils.o src/utils.c src/utils.c:241:19: warning: unused function 'sched_getattr' [-Wunused-function] 241 | static inline int sched_getattr(pid_t pid, struct sched_attr *attr, | ^~~~~~~~~~~~~ 1 warning generated. Which is correct, so remove the unused function. Link: https://lkml.kernel.org/r/eaed7ba122c4ae88ce71277c824ef41cbf789385.1707217097.git.bristot@kernel.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Bill Wendling Cc: Justin Stitt Cc: Donald Zickus Fixes: b1696371d865 ("rtla: Helper functions for rtla") Signed-off-by: Daniel Bristot de Oliveira --- tools/tracing/rtla/src/utils.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c index b998b24edf1e0..5fcd6495ff058 100644 --- a/tools/tracing/rtla/src/utils.c +++ b/tools/tracing/rtla/src/utils.c @@ -238,12 +238,6 @@ static inline int sched_setattr(pid_t pid, const struct sched_attr *attr, return syscall(__NR_sched_setattr, pid, attr, flags); } -static inline int sched_getattr(pid_t pid, struct sched_attr *attr, - unsigned int size, unsigned int flags) -{ - return syscall(__NR_sched_getattr, pid, attr, size, flags); -} - int __set_sched_attr(int pid, struct sched_attr *attr) { int flags = 0; -- GitLab From f9b2c87105c989a7b259c6da87673ada96dce2f8 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Tue, 6 Feb 2024 12:05:33 +0100 Subject: [PATCH 0941/1405] tools/rv: Fix Makefile compiler options for clang The following errors are showing up when compiling rv with clang: $ make HOSTCC=clang CC=clang LLVM_IAS=1 [...] clang -O -g -DVERSION=\"6.8.0-rc1\" -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong -fasynchronous-unwind-tables -fstack-clash-protection -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized $(pkg-config --cflags libtracefs) -I include -c -o src/utils.o src/utils.c clang: warning: optimization flag '-ffat-lto-objects' is not supported [-Wignored-optimization-argument] warning: unknown warning option '-Wno-maybe-uninitialized'; did you mean '-Wno-uninitialized'? [-Wunknown-warning-option] 1 warning generated. clang -o rv -ggdb src/in_kernel.o src/rv.o src/trace.o src/utils.o $(pkg-config --libs libtracefs) src/in_kernel.o: file not recognized: file format not recognized clang: error: linker command failed with exit code 1 (use -v to see invocation) make: *** [Makefile:110: rv] Error 1 Solve these issues by: - removing -ffat-lto-objects and -Wno-maybe-uninitialized if using clang - informing the linker about -flto=auto Link: https://lkml.kernel.org/r/ed94a8ddc2ca8c8ef663cfb7ae9dd196c4a66b33.1707217097.git.bristot@kernel.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Bill Wendling Cc: Justin Stitt Fixes: 4bc4b131d44c ("rv: Add rv tool") Suggested-by: Donald Zickus Signed-off-by: Daniel Bristot de Oliveira --- tools/verification/rv/Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/verification/rv/Makefile b/tools/verification/rv/Makefile index 3d0f3888a58c6..485f8aeddbe03 100644 --- a/tools/verification/rv/Makefile +++ b/tools/verification/rv/Makefile @@ -28,10 +28,15 @@ FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \ -fasynchronous-unwind-tables -fstack-clash-protection WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized +ifeq ($(CC),clang) + FOPTS := $(filter-out -ffat-lto-objects, $(FOPTS)) + WOPTS := $(filter-out -Wno-maybe-uninitialized, $(WOPTS)) +endif + TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs) CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) $(EXTRA_CFLAGS) -I include -LDFLAGS := -ggdb $(EXTRA_LDFLAGS) +LDFLAGS := -flto=auto -ggdb $(EXTRA_LDFLAGS) LIBS := $$($(PKG_CONFIG) --libs libtracefs) SRC := $(wildcard src/*.c) -- GitLab From 61ec586bc0815959d3314cf7ce242529c977b357 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Tue, 6 Feb 2024 12:05:34 +0100 Subject: [PATCH 0942/1405] tools/rv: Fix curr_reactor uninitialized variable clang is reporting: $ make HOSTCC=clang CC=clang LLVM_IAS=1 clang -O -g -DVERSION=\"6.8.0-rc3\" -flto=auto -fexceptions -fstack-protector-strong -fasynchronous-unwind-tables -fstack-clash-protection -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS $(pkg-config --cflags libtracefs) -I include -c -o src/in_kernel.o src/in_kernel.c [...] src/in_kernel.c:227:6: warning: variable 'curr_reactor' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] 227 | if (!end) | ^~~~ src/in_kernel.c:242:9: note: uninitialized use occurs here 242 | return curr_reactor; | ^~~~~~~~~~~~ src/in_kernel.c:227:2: note: remove the 'if' if its condition is always false 227 | if (!end) | ^~~~~~~~~ 228 | goto out_free; | ~~~~~~~~~~~~~ src/in_kernel.c:221:6: warning: variable 'curr_reactor' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] 221 | if (!start) | ^~~~~~ src/in_kernel.c:242:9: note: uninitialized use occurs here 242 | return curr_reactor; | ^~~~~~~~~~~~ src/in_kernel.c:221:2: note: remove the 'if' if its condition is always false 221 | if (!start) | ^~~~~~~~~~~ 222 | goto out_free; | ~~~~~~~~~~~~~ src/in_kernel.c:215:20: note: initialize the variable 'curr_reactor' to silence this warning 215 | char *curr_reactor; | ^ | = NULL 2 warnings generated. Which is correct. Setting curr_reactor to NULL avoids the problem. Link: https://lkml.kernel.org/r/3a35551149e5ee0cb0950035afcb8082c3b5d05b.1707217097.git.bristot@kernel.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Bill Wendling Cc: Justin Stitt Cc: Donald Zickus Fixes: 6d60f89691fc ("tools/rv: Add in-kernel monitor interface") Signed-off-by: Daniel Bristot de Oliveira --- tools/verification/rv/src/in_kernel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/verification/rv/src/in_kernel.c b/tools/verification/rv/src/in_kernel.c index ad28582bcf2b1..f04479ecc96c0 100644 --- a/tools/verification/rv/src/in_kernel.c +++ b/tools/verification/rv/src/in_kernel.c @@ -210,9 +210,9 @@ static char *ikm_read_reactor(char *monitor_name) static char *ikm_get_current_reactor(char *monitor_name) { char *reactors = ikm_read_reactor(monitor_name); + char *curr_reactor = NULL; char *start; char *end; - char *curr_reactor; if (!reactors) return NULL; -- GitLab From 962ac2dce56bb3aad1f82a4bbe3ada57a020287c Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Mon, 5 Feb 2024 20:46:19 +0000 Subject: [PATCH 0943/1405] drm/i915/dsc: Fix the macro that calculates DSCC_/DSCA_ PPS reg address Commit bd077259d0a9 ("drm/i915/vdsc: Add function to read any PPS register") defines a new macro to calculate the DSC PPS register addresses with PPS number as an input. This macro correctly calculates the addresses till PPS 11 since the addresses increment by 4. So in that case the following macro works correctly to give correct register address: _MMIO(_DSCA_PPS_0 + (pps) * 4) However after PPS 11, the register address for PPS 12 increments by 12 because of RC Buffer memory allocation in between. Because of this discontinuity in the address space, the macro calculates wrong addresses for PPS 12 - 16 resulting into incorrect DSC PPS parameter value read/writes causing DSC corruption. This fixes it by correcting this macro to add the offset of 12 for PPS >=12. v3: Add correct paranthesis for pps argument (Jani Nikula) Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10172 Fixes: bd077259d0a9 ("drm/i915/vdsc: Add function to read any PPS register") Cc: Suraj Kandpal Cc: Ankit Nautiyal Cc: Animesh Manna Cc: Jani Nikula Cc: Sean Paul Cc: Drew Davenport Signed-off-by: Manasi Navare Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240205204619.1991673-1-navaremanasi@chromium.org (cherry picked from commit 6074be620c31dc2ae11af96a1a5ea95580976fb5) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_vdsc_regs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vdsc_regs.h b/drivers/gpu/drm/i915/display/intel_vdsc_regs.h index 64f440fdc22b2..8b21dc8e26d52 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc_regs.h +++ b/drivers/gpu/drm/i915/display/intel_vdsc_regs.h @@ -51,8 +51,8 @@ #define DSCC_PICTURE_PARAMETER_SET_0 _MMIO(0x6BA00) #define _DSCA_PPS_0 0x6B200 #define _DSCC_PPS_0 0x6BA00 -#define DSCA_PPS(pps) _MMIO(_DSCA_PPS_0 + (pps) * 4) -#define DSCC_PPS(pps) _MMIO(_DSCC_PPS_0 + (pps) * 4) +#define DSCA_PPS(pps) _MMIO(_DSCA_PPS_0 + ((pps) < 12 ? (pps) : (pps) + 12) * 4) +#define DSCC_PPS(pps) _MMIO(_DSCC_PPS_0 + ((pps) < 12 ? (pps) : (pps) + 12) * 4) #define _ICL_DSC0_PICTURE_PARAMETER_SET_0_PB 0x78270 #define _ICL_DSC1_PICTURE_PARAMETER_SET_0_PB 0x78370 #define _ICL_DSC0_PICTURE_PARAMETER_SET_0_PC 0x78470 -- GitLab From ad26d56d080780bbfcc1696ca0c0cce3e2124ef6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 8 Feb 2024 17:45:52 +0200 Subject: [PATCH 0944/1405] drm/i915/dp: Limit SST link rate to <=8.1Gbps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Limit the link rate to HBR3 or below (<=8.1Gbps) in SST mode. UHBR (10Gbps+) link rates require 128b/132b channel encoding which we have not yet hooked up into the SST/no-sideband codepaths. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240208154552.14545-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 6061811d72e14f41f71b6a025510920b187bfcca) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_dp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index f5ef95da55346..ae647d03af25c 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2355,6 +2355,9 @@ intel_dp_compute_config_limits(struct intel_dp *intel_dp, limits->min_rate = intel_dp_common_rate(intel_dp, 0); limits->max_rate = intel_dp_max_link_rate(intel_dp); + /* FIXME 128b/132b SST support missing */ + limits->max_rate = min(limits->max_rate, 810000); + limits->min_lane_count = 1; limits->max_lane_count = intel_dp_max_lane_count(intel_dp); -- GitLab From 119ff04864a24470b1e531bb53e5c141aa8fefb0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Feb 2024 14:43:21 +0000 Subject: [PATCH 0945/1405] tcp: move tp->scaling_ratio to tcp_sock_read_txrx group tp->scaling_ratio is a read mostly field, used in rx and tx fast paths. Fixes: d5fed5addb2b ("tcp: reorganize tcp_sock fast path variables") Signed-off-by: Eric Dumazet Cc: Coco Li Cc: Wei Wang Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- Documentation/networking/net_cachelines/tcp_sock.rst | 2 +- include/linux/tcp.h | 2 +- net/ipv4/tcp.c | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst index 97d7a5c8e01c0..8039122914797 100644 --- a/Documentation/networking/net_cachelines/tcp_sock.rst +++ b/Documentation/networking/net_cachelines/tcp_sock.rst @@ -38,7 +38,7 @@ u32 max_window read_mostly - u32 mss_cache read_mostly read_mostly tcp_rate_check_app_limited,tcp_current_mss,tcp_sync_mss,tcp_sndbuf_expand,tcp_tso_should_defer(tx);tcp_update_pacing_rate,tcp_clean_rtx_queue(rx) u32 window_clamp read_mostly read_write tcp_rcv_space_adjust,__tcp_select_window u32 rcv_ssthresh read_mostly - __tcp_select_window -u82 scaling_ratio +u8 scaling_ratio read_mostly read_mostly tcp_win_from_space struct tcp_rack u16 advmss - read_mostly tcp_rcv_space_adjust u8 compressed_ack diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 89b290d8c8dc9..168f5dca66096 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -221,6 +221,7 @@ struct tcp_sock { u32 lost_out; /* Lost packets */ u32 sacked_out; /* SACK'd packets */ u16 tcp_header_len; /* Bytes of tcp header to send */ + u8 scaling_ratio; /* see tcp_win_from_space() */ u8 chrono_type : 2, /* current chronograph type */ repair : 1, is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ @@ -352,7 +353,6 @@ struct tcp_sock { u32 compressed_ack_rcv_nxt; struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ - u8 scaling_ratio; /* see tcp_win_from_space() */ /* Information of the most recently (s)acked skb */ struct tcp_rack { u64 mstamp; /* (Re)sent time of the skb */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7e2481b9eae1b..c82dc42f57c65 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4615,7 +4615,8 @@ static void __init tcp_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, prr_out); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, lost_out); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, sacked_out); - CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 31); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, scaling_ratio); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 32); /* RX read-mostly hotpath cache lines */ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, copied_seq); -- GitLab From 666a877deab2bcf8fd11c962d69e687e18168a6f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Feb 2024 14:43:22 +0000 Subject: [PATCH 0946/1405] tcp: move tp->tcp_usec_ts to tcp_sock_read_txrx group tp->tcp_usec_ts is a read mostly field, used in rx and tx fast paths. Fixes: d5fed5addb2b ("tcp: reorganize tcp_sock fast path variables") Signed-off-by: Eric Dumazet Cc: Coco Li Cc: Wei Wang Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- Documentation/networking/net_cachelines/tcp_sock.rst | 2 +- include/linux/tcp.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst index 8039122914797..1c154cbd18487 100644 --- a/Documentation/networking/net_cachelines/tcp_sock.rst +++ b/Documentation/networking/net_cachelines/tcp_sock.rst @@ -44,7 +44,7 @@ u16 advmss - read_m u8 compressed_ack u8:2 dup_ack_counter u8:1 tlp_retrans -u8:1 tcp_usec_ts +u8:1 tcp_usec_ts read_mostly read_mostly u32 chrono_start read_write - tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data) u32[3] chrono_stat read_write - tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data) u8:2 chrono_type read_write - tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 168f5dca66096..a1c47a6d69b0e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -224,6 +224,7 @@ struct tcp_sock { u8 scaling_ratio; /* see tcp_win_from_space() */ u8 chrono_type : 2, /* current chronograph type */ repair : 1, + tcp_usec_ts : 1, /* TSval values in usec */ is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ __cacheline_group_end(tcp_sock_read_txrx); @@ -368,8 +369,7 @@ struct tcp_sock { u8 compressed_ack; u8 dup_ack_counter:2, tlp_retrans:1, /* TLP is a retransmission */ - tcp_usec_ts:1, /* TSval values in usec */ - unused:4; + unused:5; u8 thin_lto : 1,/* Use linear timeouts for thin streams */ recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ -- GitLab From c353c7b7ffb7ae6ed8f3339906fe33c8be6cf344 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Feb 2024 14:43:23 +0000 Subject: [PATCH 0947/1405] net-device: move lstats in net_device_read_txrx dev->lstats is notably used from loopback ndo_start_xmit() and other virtual drivers. Per cpu stats updates are dirtying per-cpu data, but the pointer itself is read-only. Fixes: 43a71cd66b9c ("net-device: reorganize net_device fast path variables") Signed-off-by: Eric Dumazet Cc: Coco Li Cc: Simon Horman Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- Documentation/networking/net_cachelines/net_device.rst | 4 ++-- include/linux/netdevice.h | 10 +++++----- net/core/dev.c | 3 ++- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index e75a53593bb96..dceb49d56a911 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -136,8 +136,8 @@ struct_netpoll_info* npinfo - possible_net_t nd_net - read_mostly (dev_net)napi_busy_loop,tcp_v(4/6)_rcv,ip(v6)_rcv,ip(6)_input,ip(6)_input_finish void* ml_priv enum_netdev_ml_priv_type ml_priv_type -struct_pcpu_lstats__percpu* lstats -struct_pcpu_sw_netstats__percpu* tstats +struct_pcpu_lstats__percpu* lstats read_mostly dev_lstats_add() +struct_pcpu_sw_netstats__percpu* tstats read_mostly dev_sw_netstats_tx_add() struct_pcpu_dstats__percpu* dstats struct_garp_port* garp_port struct_mrp_port* mrp_port diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 118c40258d07b..ef7bfbb984973 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2141,6 +2141,11 @@ struct net_device { /* TXRX read-mostly hotpath */ __cacheline_group_begin(net_device_read_txrx); + union { + struct pcpu_lstats __percpu *lstats; + struct pcpu_sw_netstats __percpu *tstats; + struct pcpu_dstats __percpu *dstats; + }; unsigned int flags; unsigned short hard_header_len; netdev_features_t features; @@ -2395,11 +2400,6 @@ struct net_device { enum netdev_ml_priv_type ml_priv_type; enum netdev_stat_type pcpu_stat_type:8; - union { - struct pcpu_lstats __percpu *lstats; - struct pcpu_sw_netstats __percpu *tstats; - struct pcpu_dstats __percpu *dstats; - }; #if IS_ENABLED(CONFIG_GARP) struct garp_port __rcu *garp_port; diff --git a/net/core/dev.c b/net/core/dev.c index cb2dab0feee0a..9bb792cecc16f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11652,11 +11652,12 @@ static void __init net_dev_struct_check(void) CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 160); /* TXRX read-mostly hotpath */ + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, lstats); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, hard_header_len); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, features); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, ip6_ptr); - CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 30); + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 38); /* RX read-mostly hotpath */ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ptype_specific); -- GitLab From 14f08c976ffe0d2117c6199c32663df1cbc45c65 Mon Sep 17 00:00:00 2001 From: limingming3 Date: Wed, 7 Feb 2024 14:51:42 +0800 Subject: [PATCH 0948/1405] tools/rtla: Replace setting prio with nice for SCHED_OTHER Since the sched_priority for SCHED_OTHER is always 0, it makes no sence to set it. Setting nice for SCHED_OTHER seems more meaningful. Link: https://lkml.kernel.org/r/20240207065142.1753909-1-limingming3@lixiang.com Cc: stable@vger.kernel.org Fixes: b1696371d865 ("rtla: Helper functions for rtla") Signed-off-by: limingming3 Signed-off-by: Daniel Bristot de Oliveira --- tools/tracing/rtla/src/utils.c | 6 +++--- tools/tracing/rtla/src/utils.h | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c index 5fcd6495ff058..9ac71a66840c1 100644 --- a/tools/tracing/rtla/src/utils.c +++ b/tools/tracing/rtla/src/utils.c @@ -473,13 +473,13 @@ int parse_prio(char *arg, struct sched_attr *sched_param) if (prio == INVALID_VAL) return -1; - if (prio < sched_get_priority_min(SCHED_OTHER)) + if (prio < MIN_NICE) return -1; - if (prio > sched_get_priority_max(SCHED_OTHER)) + if (prio > MAX_NICE) return -1; sched_param->sched_policy = SCHED_OTHER; - sched_param->sched_priority = prio; + sched_param->sched_nice = prio; break; default: return -1; diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h index 04ed1e650495a..d44513e6c66a0 100644 --- a/tools/tracing/rtla/src/utils.h +++ b/tools/tracing/rtla/src/utils.h @@ -9,6 +9,8 @@ */ #define BUFF_U64_STR_SIZE 24 #define MAX_PATH 1024 +#define MAX_NICE 20 +#define MIN_NICE -19 #define container_of(ptr, type, member)({ \ const typeof(((type *)0)->member) *__mptr = (ptr); \ -- GitLab From b5f319360371087d52070d8f3fc7789e80ce69a6 Mon Sep 17 00:00:00 2001 From: John Kacur Date: Fri, 2 Feb 2024 19:16:07 -0500 Subject: [PATCH 0949/1405] tools/rtla: Exit with EXIT_SUCCESS when help is invoked Fix rtla so that the following commands exit with 0 when help is invoked rtla osnoise top -h rtla osnoise hist -h rtla timerlat top -h rtla timerlat hist -h Link: https://lore.kernel.org/linux-trace-devel/20240203001607.69703-1-jkacur@redhat.com Cc: stable@vger.kernel.org Fixes: 1eeb6328e8b3 ("rtla/timerlat: Add timerlat hist mode") Signed-off-by: John Kacur Signed-off-by: Daniel Bristot de Oliveira --- tools/tracing/rtla/src/osnoise_hist.c | 6 +++++- tools/tracing/rtla/src/osnoise_top.c | 6 +++++- tools/tracing/rtla/src/timerlat_hist.c | 6 +++++- tools/tracing/rtla/src/timerlat_top.c | 6 +++++- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index 67128171c29d7..01870d50942a1 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -479,7 +479,11 @@ static void osnoise_hist_usage(char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index f7c959be86777..457360db07673 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -331,7 +331,11 @@ static void osnoise_top_usage(struct osnoise_top_params *params, char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 3a5b8c409e7d8..dbf154082f958 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -545,7 +545,11 @@ static void timerlat_hist_usage(char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 1640f121baca5..3e9af2c386888 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -375,7 +375,11 @@ static void timerlat_top_usage(char *usage) for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + if (usage) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); } /* -- GitLab From bdd70eb68913c960acb895b00a8c62eb64715b1f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 8 Feb 2024 19:03:49 +0100 Subject: [PATCH 0950/1405] mptcp: drop the push_pending field Such field is there to avoid acquiring the data lock in a few spots, but it adds complexity to the already non trivial locking schema. All the relevant call sites (mptcp-level re-injection, set socket options), are slow-path, drop such field in favor of 'cb_flags', adding the relevant locking. This patch could be seen as an improvement, instead of a fix. But it simplifies the next patch. The 'Fixes' tag has been added to help having this series backported to stable. Fixes: e9d09baca676 ("mptcp: avoid atomic bit manipulation when possible") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 12 ++++++------ net/mptcp/protocol.h | 1 - 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 028e8b473626f..2111819016af6 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1505,8 +1505,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk, void mptcp_check_and_set_pending(struct sock *sk) { - if (mptcp_send_head(sk)) - mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING); + if (mptcp_send_head(sk)) { + mptcp_data_lock(sk); + mptcp_sk(sk)->cb_flags |= BIT(MPTCP_PUSH_PENDING); + mptcp_data_unlock(sk); + } } static int __subflow_push_pending(struct sock *sk, struct sock *ssk, @@ -3142,7 +3145,6 @@ static int mptcp_disconnect(struct sock *sk, int flags) mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE); WRITE_ONCE(msk->flags, 0); msk->cb_flags = 0; - msk->push_pending = 0; msk->recovery = false; msk->can_ack = false; msk->fully_established = false; @@ -3330,8 +3332,7 @@ static void mptcp_release_cb(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(sk); for (;;) { - unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) | - msk->push_pending; + unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED); struct list_head join_list; if (!flags) @@ -3347,7 +3348,6 @@ static void mptcp_release_cb(struct sock *sk) * datapath acquires the msk socket spinlock while helding * the subflow socket lock */ - msk->push_pending = 0; msk->cb_flags &= ~flags; spin_unlock_bh(&sk->sk_lock.slock); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 3517f2d24a226..b905f18682984 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -286,7 +286,6 @@ struct mptcp_sock { int rmem_released; unsigned long flags; unsigned long cb_flags; - unsigned long push_pending; bool recovery; /* closing subflow write queue reinjected */ bool can_ack; bool fully_established; -- GitLab From 013e3179dbd2bc756ce1dd90354abac62f65b739 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 8 Feb 2024 19:03:50 +0100 Subject: [PATCH 0951/1405] mptcp: fix rcv space initialization mptcp_rcv_space_init() is supposed to happen under the msk socket lock, but active msk socket does that without such protection. Leverage the existing mptcp_propagate_state() helper to that extent. We need to ensure mptcp_rcv_space_init will happen before mptcp_rcv_space_adjust(), and the release_cb does not assure that: explicitly check for such condition. While at it, move the wnd_end initialization out of mptcp_rcv_space_init(), it never belonged there. Note that the race does not produce ill effect in practice, but change allows cleaning-up and defying better the locking model. Fixes: a6b118febbab ("mptcp: add receive buffer auto-tuning") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 10 ++++++---- net/mptcp/protocol.h | 3 ++- net/mptcp/subflow.c | 4 ++-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2111819016af6..7632eafb683bc 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1963,6 +1963,9 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) if (copied <= 0) return; + if (!msk->rcvspace_init) + mptcp_rcv_space_init(msk, msk->first); + msk->rcvq_space.copied += copied; mstamp = div_u64(tcp_clock_ns(), NSEC_PER_USEC); @@ -3160,6 +3163,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) msk->bytes_received = 0; msk->bytes_sent = 0; msk->bytes_retrans = 0; + msk->rcvspace_init = 0; WRITE_ONCE(sk->sk_shutdown, 0); sk_error_report(sk); @@ -3247,6 +3251,7 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk) { const struct tcp_sock *tp = tcp_sk(ssk); + msk->rcvspace_init = 1; msk->rcvq_space.copied = 0; msk->rcvq_space.rtt_us = 0; @@ -3257,8 +3262,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk) TCP_INIT_CWND * tp->advmss); if (msk->rcvq_space.space == 0) msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT; - - WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd); } void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags) @@ -3478,10 +3481,9 @@ void mptcp_finish_connect(struct sock *ssk) WRITE_ONCE(msk->write_seq, subflow->idsn + 1); WRITE_ONCE(msk->snd_nxt, msk->write_seq); WRITE_ONCE(msk->snd_una, msk->write_seq); + WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd); mptcp_pm_new_connection(msk, ssk, 0); - - mptcp_rcv_space_init(msk, ssk); } void mptcp_sock_graft(struct sock *sk, struct socket *parent) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index b905f18682984..9f5ee82e34731 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -304,7 +304,8 @@ struct mptcp_sock { nodelay:1, fastopening:1, in_accept_queue:1, - free_first:1; + free_first:1, + rcvspace_init:1; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 0dcb721c89d19..56b2ac2f2f22d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -424,6 +424,8 @@ void __mptcp_sync_state(struct sock *sk, int state) struct mptcp_sock *msk = mptcp_sk(sk); __mptcp_propagate_sndbuf(sk, msk->first); + if (!msk->rcvspace_init) + mptcp_rcv_space_init(msk, msk->first); if (sk->sk_state == TCP_SYN_SENT) { mptcp_set_state(sk, state); sk->sk_state_change(sk); @@ -545,7 +547,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) } } else if (mptcp_check_fallback(sk)) { fallback: - mptcp_rcv_space_init(msk, sk); mptcp_propagate_state(parent, sk); } return; @@ -1744,7 +1745,6 @@ static void subflow_state_change(struct sock *sk) msk = mptcp_sk(parent); if (subflow_simultaneous_connect(sk)) { mptcp_do_fallback(sk); - mptcp_rcv_space_init(msk, sk); pr_fallback(msk); subflow->conn_finished = 1; mptcp_propagate_state(parent, sk); -- GitLab From 3f83d8a77eeeb47011b990fd766a421ee64f1d73 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 8 Feb 2024 19:03:51 +0100 Subject: [PATCH 0952/1405] mptcp: fix more tx path fields initialization The 'msk->write_seq' and 'msk->snd_nxt' are always updated under the msk socket lock, except at MPC handshake completiont time. Builds-up on the previous commit to move such init under the relevant lock. There are no known problems caused by the potential race, the primary goal is consistency. Fixes: 6d0060f600ad ("mptcp: Write MPTCP DSS headers to outgoing data packets") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 6 ++---- net/mptcp/subflow.c | 13 +++++++++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 7632eafb683bc..8cb6a873dae9a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3478,10 +3478,8 @@ void mptcp_finish_connect(struct sock *ssk) * accessing the field below */ WRITE_ONCE(msk->local_key, subflow->local_key); - WRITE_ONCE(msk->write_seq, subflow->idsn + 1); - WRITE_ONCE(msk->snd_nxt, msk->write_seq); - WRITE_ONCE(msk->snd_una, msk->write_seq); - WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd); + WRITE_ONCE(msk->snd_una, subflow->idsn + 1); + WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd); mptcp_pm_new_connection(msk, ssk, 0); } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 56b2ac2f2f22d..c2df34ebcf284 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -421,12 +421,21 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc void __mptcp_sync_state(struct sock *sk, int state) { + struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); + struct sock *ssk = msk->first; - __mptcp_propagate_sndbuf(sk, msk->first); + subflow = mptcp_subflow_ctx(ssk); + __mptcp_propagate_sndbuf(sk, ssk); if (!msk->rcvspace_init) - mptcp_rcv_space_init(msk, msk->first); + mptcp_rcv_space_init(msk, ssk); + if (sk->sk_state == TCP_SYN_SENT) { + /* subflow->idsn is always available is TCP_SYN_SENT state, + * even for the FASTOPEN scenarios + */ + WRITE_ONCE(msk->write_seq, subflow->idsn + 1); + WRITE_ONCE(msk->snd_nxt, msk->write_seq); mptcp_set_state(sk, state); sk->sk_state_change(sk); } -- GitLab From e4a0fa47e816e186f6b4c0055d07eeec42d11871 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 8 Feb 2024 19:03:52 +0100 Subject: [PATCH 0953/1405] mptcp: corner case locking for rx path fields initialization Most MPTCP-level related fields are under the mptcp data lock protection, but are written one-off without such lock at MPC complete time, both for the client and the server Leverage the mptcp_propagate_state() infrastructure to move such initialization under the proper lock client-wise. The server side critical init steps are done by mptcp_subflow_fully_established(): ensure the caller properly held the relevant lock, and avoid acquiring the same lock in the nested scopes. There are no real potential races, as write access to such fields is implicitly serialized by the MPTCP state machine; the primary goal is consistency. Fixes: d22f4988ffec ("mptcp: process MP_CAPABLE data option") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/fastopen.c | 6 ++--- net/mptcp/options.c | 9 +++---- net/mptcp/protocol.c | 9 ++++--- net/mptcp/protocol.h | 9 +++---- net/mptcp/subflow.c | 56 +++++++++++++++++++++++++------------------- 5 files changed, 50 insertions(+), 39 deletions(-) diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index 74698582a2859..ad28da655f8bc 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c @@ -59,13 +59,12 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf mptcp_data_unlock(sk); } -void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt) +void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt) { struct sock *sk = (struct sock *)msk; struct sk_buff *skb; - mptcp_data_lock(sk); skb = skb_peek_tail(&sk->sk_receive_queue); if (skb) { WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq); @@ -77,5 +76,4 @@ void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_ } pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq); - mptcp_data_unlock(sk); } diff --git a/net/mptcp/options.c b/net/mptcp/options.c index d2527d189a799..e3e96a49f9229 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -962,9 +962,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, /* subflows are fully established as soon as we get any * additional ack, including ADD_ADDR. */ - subflow->fully_established = 1; - WRITE_ONCE(msk->fully_established, true); - goto check_notify; + goto set_fully_established; } /* If the first established packet does not contain MP_CAPABLE + data @@ -986,7 +984,10 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, set_fully_established: if (unlikely(!READ_ONCE(msk->pm.server_side))) pr_warn_once("bogus mpc option on established client sk"); - mptcp_subflow_fully_established(subflow, mp_opt); + + mptcp_data_lock((struct sock *)msk); + __mptcp_subflow_fully_established(msk, subflow, mp_opt); + mptcp_data_unlock((struct sock *)msk); check_notify: /* if the subflow is not already linked into the conn_list, we can't diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 8cb6a873dae9a..8ef2927ebca29 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3186,6 +3186,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC); + struct mptcp_subflow_context *subflow; struct mptcp_sock *msk; if (!nsk) @@ -3226,7 +3227,8 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, /* The msk maintain a ref to each subflow in the connections list */ WRITE_ONCE(msk->first, ssk); - list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list); + subflow = mptcp_subflow_ctx(ssk); + list_add(&subflow->node, &msk->conn_list); sock_hold(ssk); /* new mpc subflow takes ownership of the newly @@ -3241,6 +3243,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, __mptcp_propagate_sndbuf(nsk, ssk); mptcp_rcv_space_init(msk, ssk); + + if (mp_opt->suboptions & OPTION_MPTCP_MPC_ACK) + __mptcp_subflow_fully_established(msk, subflow, mp_opt); bh_unlock_sock(nsk); /* note: the newly allocated socket refcount is 2 now */ @@ -3478,8 +3483,6 @@ void mptcp_finish_connect(struct sock *ssk) * accessing the field below */ WRITE_ONCE(msk->local_key, subflow->local_key); - WRITE_ONCE(msk->snd_una, subflow->idsn + 1); - WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd); mptcp_pm_new_connection(msk, ssk, 0); } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 9f5ee82e34731..fefcbf5854119 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -622,8 +622,9 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net); unsigned int mptcp_close_timeout(const struct sock *sk); int mptcp_get_pm_type(const struct net *net); const char *mptcp_get_scheduler(const struct net *net); -void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt); +void __mptcp_subflow_fully_established(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt); bool __mptcp_retransmit_pending_data(struct sock *sk); void mptcp_check_and_set_pending(struct sock *sk); void __mptcp_push_pending(struct sock *sk, unsigned int flags); @@ -952,8 +953,8 @@ void mptcp_event_pm_listener(const struct sock *ssk, enum mptcp_event_type event); bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); -void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt); +void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt); void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow, struct request_sock *req); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index c2df34ebcf284..c34ecadee1200 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -441,20 +441,6 @@ void __mptcp_sync_state(struct sock *sk, int state) } } -static void mptcp_propagate_state(struct sock *sk, struct sock *ssk) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - - mptcp_data_lock(sk); - if (!sock_owned_by_user(sk)) { - __mptcp_sync_state(sk, ssk->sk_state); - } else { - msk->pending_state = ssk->sk_state; - __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags); - } - mptcp_data_unlock(sk); -} - static void subflow_set_remote_key(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, const struct mptcp_options_received *mp_opt) @@ -476,6 +462,31 @@ static void subflow_set_remote_key(struct mptcp_sock *msk, atomic64_set(&msk->rcv_wnd_sent, subflow->iasn); } +static void mptcp_propagate_state(struct sock *sk, struct sock *ssk, + struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + mptcp_data_lock(sk); + if (mp_opt) { + /* Options are available only in the non fallback cases + * avoid updating rx path fields otherwise + */ + WRITE_ONCE(msk->snd_una, subflow->idsn + 1); + WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd); + subflow_set_remote_key(msk, subflow, mp_opt); + } + + if (!sock_owned_by_user(sk)) { + __mptcp_sync_state(sk, ssk->sk_state); + } else { + msk->pending_state = ssk->sk_state; + __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags); + } + mptcp_data_unlock(sk); +} + static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); @@ -510,10 +521,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) if (mp_opt.deny_join_id0) WRITE_ONCE(msk->pm.remote_deny_join_id0, true); subflow->mp_capable = 1; - subflow_set_remote_key(msk, subflow, &mp_opt); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK); mptcp_finish_connect(sk); - mptcp_propagate_state(parent, sk); + mptcp_propagate_state(parent, sk, subflow, &mp_opt); } else if (subflow->request_join) { u8 hmac[SHA256_DIGEST_SIZE]; @@ -556,7 +566,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) } } else if (mptcp_check_fallback(sk)) { fallback: - mptcp_propagate_state(parent, sk); + mptcp_propagate_state(parent, sk, subflow, NULL); } return; @@ -741,17 +751,16 @@ void mptcp_subflow_drop_ctx(struct sock *ssk) kfree_rcu(ctx, rcu); } -void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt) +void __mptcp_subflow_fully_established(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt) { - struct mptcp_sock *msk = mptcp_sk(subflow->conn); - subflow_set_remote_key(msk, subflow, mp_opt); subflow->fully_established = 1; WRITE_ONCE(msk->fully_established, true); if (subflow->is_mptfo) - mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt); + __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt); } static struct sock *subflow_syn_recv_sock(const struct sock *sk, @@ -844,7 +853,6 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, * mpc option */ if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) { - mptcp_subflow_fully_established(ctx, &mp_opt); mptcp_pm_fully_established(owner, child); ctx->pm_notified = 1; } @@ -1756,7 +1764,7 @@ static void subflow_state_change(struct sock *sk) mptcp_do_fallback(sk); pr_fallback(msk); subflow->conn_finished = 1; - mptcp_propagate_state(parent, sk); + mptcp_propagate_state(parent, sk, subflow, NULL); } /* as recvmsg() does not acquire the subflow socket for ssk selection -- GitLab From f012d796a6de662692159c539689e47e662853a8 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 8 Feb 2024 19:03:53 +0100 Subject: [PATCH 0954/1405] mptcp: check addrs list in userspace_pm_get_local_id Before adding a new entry in mptcp_userspace_pm_get_local_id(), it's better to check whether this address is already in userspace pm local address list. If it's in the list, no need to add a new entry, just return it's address ID and use this address. Fixes: 8b20137012d9 ("mptcp: read attributes of addr entries managed by userspace PMs") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/pm_userspace.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index efecbe3cf4153..4f3901d5b8ef8 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -130,10 +130,21 @@ int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc) { - struct mptcp_pm_addr_entry new_entry; + struct mptcp_pm_addr_entry *entry = NULL, *e, new_entry; __be16 msk_sport = ((struct inet_sock *) inet_sk((struct sock *)msk))->inet_sport; + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) { + if (mptcp_addresses_equal(&e->addr, skc, false)) { + entry = e; + break; + } + } + spin_unlock_bh(&msk->pm.lock); + if (entry) + return entry->addr.id; + memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry)); new_entry.addr = *skc; new_entry.addr.id = 0; -- GitLab From 337cebbd850f94147cee05252778f8f78b8c337f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 8 Feb 2024 19:03:54 +0100 Subject: [PATCH 0955/1405] mptcp: really cope with fastopen race Fastopen and PM-trigger subflow shutdown can race, as reported by syzkaller. In my first attempt to close such race, I missed the fact that the subflow status can change again before the subflow_state_change callback is invoked. Address the issue additionally copying with all the states directly reachable from TCP_FIN_WAIT1. Fixes: 1e777f39b4d7 ("mptcp: add MSG_FASTOPEN sendmsg flag support") Fixes: 4fd19a307016 ("mptcp: fix inconsistent state on fastopen race") Cc: stable@vger.kernel.org Reported-by: syzbot+c53d4d3ddb327e80bc51@syzkaller.appspotmail.com Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/458 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/protocol.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index fefcbf5854119..ed50f2015dc38 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1129,7 +1129,8 @@ static inline bool subflow_simultaneous_connect(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1) && + return (1 << sk->sk_state) & + (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING) && is_active_ssk(subflow) && !subflow->conn_finished; } -- GitLab From 68990d006d42b6ef7910fa263f87e9e0d812113b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 8 Feb 2024 19:03:55 +0100 Subject: [PATCH 0956/1405] MAINTAINERS: update Geliang's email address Update my email-address in MAINTAINERS and .mailmap entries to my kernel.org account. Suggested-by: Mat Martineau Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- .mailmap | 9 +++++---- MAINTAINERS | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.mailmap b/.mailmap index 04998f7bda818..327e7eddd1461 100644 --- a/.mailmap +++ b/.mailmap @@ -191,10 +191,11 @@ Gao Xiang Gao Xiang Gao Xiang Gao Xiang -Geliang Tang -Geliang Tang -Geliang Tang -Geliang Tang +Geliang Tang +Geliang Tang +Geliang Tang +Geliang Tang +Geliang Tang Georgi Djakov Gerald Schaefer Gerald Schaefer diff --git a/MAINTAINERS b/MAINTAINERS index 3dfe7ea25320c..aa97760311217 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15324,7 +15324,7 @@ K: \bmdo_ NETWORKING [MPTCP] M: Matthieu Baerts M: Mat Martineau -R: Geliang Tang +R: Geliang Tang L: netdev@vger.kernel.org L: mptcp@lists.linux.dev S: Maintained -- GitLab From 9a0c32d698c1d0c4a6f5642ac017da31febad1eb Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 2 Feb 2024 01:05:50 +0100 Subject: [PATCH 0957/1405] drm/nouveau: don't fini scheduler if not initialized nouveau_abi16_ioctl_channel_alloc() and nouveau_cli_init() simply call their corresponding *_fini() counterpart. This can lead to nouveau_sched_fini() being called without struct nouveau_sched ever being initialized in the first place. Instead of embedding struct nouveau_sched into struct nouveau_cli and struct nouveau_chan_abi16, allocate struct nouveau_sched separately, such that we can check for the corresponding pointer to be NULL in the particular *_fini() functions. It makes sense to allocate struct nouveau_sched separately anyway, since in a subsequent commit we can also avoid to allocate a struct nouveau_sched in nouveau_abi16_ioctl_channel_alloc() at all, if the VM_BIND uAPI has been disabled due to the legacy uAPI being used. Fixes: 5f03a507b29e ("drm/nouveau: implement 1:1 scheduler - entity relationship") Reported-by: Timur Tabi Tested-by: Timur Tabi Closes: https://lore.kernel.org/nouveau/20240131213917.1545604-1-ttabi@nvidia.com/ Reviewed-by: Dave Airlie Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240202000606.3526-1-dakr@redhat.com --- drivers/gpu/drm/nouveau/nouveau_abi16.c | 10 ++++--- drivers/gpu/drm/nouveau/nouveau_abi16.h | 2 +- drivers/gpu/drm/nouveau/nouveau_drm.c | 7 +++-- drivers/gpu/drm/nouveau/nouveau_drv.h | 2 +- drivers/gpu/drm/nouveau/nouveau_exec.c | 2 +- drivers/gpu/drm/nouveau/nouveau_sched.c | 38 +++++++++++++++++++++++-- drivers/gpu/drm/nouveau/nouveau_sched.h | 6 ++-- drivers/gpu/drm/nouveau/nouveau_uvmm.c | 2 +- 8 files changed, 53 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index a04156ca8390b..ca4b5ab3e59e6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -128,12 +128,14 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16, struct nouveau_abi16_ntfy *ntfy, *temp; /* Cancel all jobs from the entity's queue. */ - drm_sched_entity_fini(&chan->sched.entity); + if (chan->sched) + drm_sched_entity_fini(&chan->sched->entity); if (chan->chan) nouveau_channel_idle(chan->chan); - nouveau_sched_fini(&chan->sched); + if (chan->sched) + nouveau_sched_destroy(&chan->sched); /* cleanup notifier state */ list_for_each_entry_safe(ntfy, temp, &chan->notifiers, head) { @@ -337,8 +339,8 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) if (ret) goto done; - ret = nouveau_sched_init(&chan->sched, drm, drm->sched_wq, - chan->chan->dma.ib_max); + ret = nouveau_sched_create(&chan->sched, drm, drm->sched_wq, + chan->chan->dma.ib_max); if (ret) goto done; diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h index 1f5e243c0c759..11c8c4a80079b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.h +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h @@ -26,7 +26,7 @@ struct nouveau_abi16_chan { struct nouveau_bo *ntfy; struct nouveau_vma *ntfy_vma; struct nvkm_mm heap; - struct nouveau_sched sched; + struct nouveau_sched *sched; }; struct nouveau_abi16 { diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 6f6c31a9937b2..a947e1d5f309a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -201,7 +201,8 @@ nouveau_cli_fini(struct nouveau_cli *cli) WARN_ON(!list_empty(&cli->worker)); usif_client_fini(cli); - nouveau_sched_fini(&cli->sched); + if (cli->sched) + nouveau_sched_destroy(&cli->sched); if (uvmm) nouveau_uvmm_fini(uvmm); nouveau_vmm_fini(&cli->svm); @@ -311,7 +312,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname, cli->mem = &mems[ret]; /* Don't pass in the (shared) sched_wq in order to let - * nouveau_sched_init() create a dedicated one for VM_BIND jobs. + * nouveau_sched_create() create a dedicated one for VM_BIND jobs. * * This is required to ensure that for VM_BIND jobs free_job() work and * run_job() work can always run concurrently and hence, free_job() work @@ -320,7 +321,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname, * locks which indirectly or directly are held for allocations * elsewhere. */ - ret = nouveau_sched_init(&cli->sched, drm, NULL, 1); + ret = nouveau_sched_create(&cli->sched, drm, NULL, 1); if (ret) goto done; diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 8a6d94c8b1631..e239c6bf4afa4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -98,7 +98,7 @@ struct nouveau_cli { bool disabled; } uvmm; - struct nouveau_sched sched; + struct nouveau_sched *sched; const struct nvif_mclass *mem; diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c index bc5d71b79ab20..e65c0ef23bc73 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.c +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c @@ -389,7 +389,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev, if (ret) goto out; - args.sched = &chan16->sched; + args.sched = chan16->sched; args.file_priv = file_priv; args.chan = chan; diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index dd98f6910f9ca..32fa2e273965b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -398,7 +398,7 @@ static const struct drm_sched_backend_ops nouveau_sched_ops = { .free_job = nouveau_sched_free_job, }; -int +static int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, struct workqueue_struct *wq, u32 credit_limit) { @@ -453,7 +453,30 @@ nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, return ret; } -void +int +nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm, + struct workqueue_struct *wq, u32 credit_limit) +{ + struct nouveau_sched *sched; + int ret; + + sched = kzalloc(sizeof(*sched), GFP_KERNEL); + if (!sched) + return -ENOMEM; + + ret = nouveau_sched_init(sched, drm, wq, credit_limit); + if (ret) { + kfree(sched); + return ret; + } + + *psched = sched; + + return 0; +} + + +static void nouveau_sched_fini(struct nouveau_sched *sched) { struct drm_gpu_scheduler *drm_sched = &sched->base; @@ -471,3 +494,14 @@ nouveau_sched_fini(struct nouveau_sched *sched) if (sched->wq) destroy_workqueue(sched->wq); } + +void +nouveau_sched_destroy(struct nouveau_sched **psched) +{ + struct nouveau_sched *sched = *psched; + + nouveau_sched_fini(sched); + kfree(sched); + + *psched = NULL; +} diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h index a6528f5981e6a..e1f01a23e6f6e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.h +++ b/drivers/gpu/drm/nouveau/nouveau_sched.h @@ -111,8 +111,8 @@ struct nouveau_sched { } job; }; -int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, - struct workqueue_struct *wq, u32 credit_limit); -void nouveau_sched_fini(struct nouveau_sched *sched); +int nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm, + struct workqueue_struct *wq, u32 credit_limit); +void nouveau_sched_destroy(struct nouveau_sched **psched); #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 4f223c972c6a8..0a0a11dc9ec03 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -1740,7 +1740,7 @@ nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev, if (ret) return ret; - args.sched = &cli->sched; + args.sched = cli->sched; args.file_priv = file_priv; ret = nouveau_uvmm_vm_bind(&args); -- GitLab From a1d8700d906444167899e5a3c64a11ba50c0badd Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 2 Feb 2024 01:05:51 +0100 Subject: [PATCH 0958/1405] drm/nouveau: omit to create schedulers using the legacy uAPI Omit to create scheduler instances when using the legacy uAPI. When using the legacy NOUVEAU_GEM_PUSHBUF ioctl no scheduler instance is required, hence omit creating scheduler instances in nouveau_abi16_ioctl_channel_alloc(). Tested-by: Timur Tabi Reviewed-by: Dave Airlie Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240202000606.3526-2-dakr@redhat.com --- drivers/gpu/drm/nouveau/nouveau_abi16.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index ca4b5ab3e59e6..d1bb8151a1df5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -339,10 +339,16 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) if (ret) goto done; - ret = nouveau_sched_create(&chan->sched, drm, drm->sched_wq, - chan->chan->dma.ib_max); - if (ret) - goto done; + /* If we're not using the VM_BIND uAPI, we don't need a scheduler. + * + * The client lock is already acquired by nouveau_abi16_get(). + */ + if (nouveau_cli_uvmm(cli)) { + ret = nouveau_sched_create(&chan->sched, drm, drm->sched_wq, + chan->chan->dma.ib_max); + if (ret) + goto done; + } init->channel = chan->chan->chid; -- GitLab From 5ba4e6d5863c53e937f49932dee0ecb004c65928 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 8 Feb 2024 17:36:28 -0500 Subject: [PATCH 0959/1405] RDMA/qedr: Fix qedr_create_user_qp error flow Avoid the following warning by making sure to free the allocated resources in case that qedr_init_user_queue() fail. -----------[ cut here ]----------- WARNING: CPU: 0 PID: 143192 at drivers/infiniband/core/rdma_core.c:874 uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] Modules linked in: tls target_core_user uio target_core_pscsi target_core_file target_core_iblock ib_srpt ib_srp scsi_transport_srp nfsd nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs 8021q garp mrp stp llc ext4 mbcache jbd2 opa_vnic ib_umad ib_ipoib sunrpc rdma_ucm ib_isert iscsi_target_mod target_core_mod ib_iser libiscsi scsi_transport_iscsi rdma_cm iw_cm ib_cm hfi1 intel_rapl_msr intel_rapl_common mgag200 qedr sb_edac drm_shmem_helper rdmavt x86_pkg_temp_thermal drm_kms_helper intel_powerclamp ib_uverbs coretemp i2c_algo_bit kvm_intel dell_wmi_descriptor ipmi_ssif sparse_keymap kvm ib_core rfkill syscopyarea sysfillrect video sysimgblt irqbypass ipmi_si ipmi_devintf fb_sys_fops rapl iTCO_wdt mxm_wmi iTCO_vendor_support intel_cstate pcspkr dcdbas intel_uncore ipmi_msghandler lpc_ich acpi_power_meter mei_me mei fuse drm xfs libcrc32c qede sd_mod ahci libahci t10_pi sg crct10dif_pclmul crc32_pclmul crc32c_intel qed libata tg3 ghash_clmulni_intel megaraid_sas crc8 wmi [last unloaded: ib_srpt] CPU: 0 PID: 143192 Comm: fi_rdm_tagged_p Kdump: loaded Not tainted 5.14.0-408.el9.x86_64 #1 Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 2.14.0 01/25/2022 RIP: 0010:uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] Code: 5d 41 5c 41 5d 41 5e e9 0f 26 1b dd 48 89 df e8 67 6a ff ff 49 8b 86 10 01 00 00 48 85 c0 74 9c 4c 89 e7 e8 83 c0 cb dd eb 92 <0f> 0b eb be 0f 0b be 04 00 00 00 48 89 df e8 8e f5 ff ff e9 6d ff RSP: 0018:ffffb7c6cadfbc60 EFLAGS: 00010286 RAX: ffff8f0889ee3f60 RBX: ffff8f088c1a5200 RCX: 00000000802a0016 RDX: 00000000802a0017 RSI: 0000000000000001 RDI: ffff8f0880042600 RBP: 0000000000000001 R08: 0000000000000001 R09: 0000000000000000 R10: ffff8f11fffd5000 R11: 0000000000039000 R12: ffff8f0d5b36cd80 R13: ffff8f088c1a5250 R14: ffff8f1206d91000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8f11d7c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000147069200e20 CR3: 00000001c7210002 CR4: 00000000001706f0 Call Trace: ? show_trace_log_lvl+0x1c4/0x2df ? show_trace_log_lvl+0x1c4/0x2df ? ib_uverbs_close+0x1f/0xb0 [ib_uverbs] ? uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] ? __warn+0x81/0x110 ? uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] ? report_bug+0x10a/0x140 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x14/0x70 ? asm_exc_invalid_op+0x16/0x20 ? uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] ib_uverbs_close+0x1f/0xb0 [ib_uverbs] __fput+0x94/0x250 task_work_run+0x5c/0x90 do_exit+0x270/0x4a0 do_group_exit+0x2d/0x90 get_signal+0x87c/0x8c0 arch_do_signal_or_restart+0x25/0x100 ? ib_uverbs_ioctl+0xc2/0x110 [ib_uverbs] exit_to_user_mode_loop+0x9c/0x130 exit_to_user_mode_prepare+0xb6/0x100 syscall_exit_to_user_mode+0x12/0x40 do_syscall_64+0x69/0x90 ? syscall_exit_work+0x103/0x130 ? syscall_exit_to_user_mode+0x22/0x40 ? do_syscall_64+0x69/0x90 ? syscall_exit_work+0x103/0x130 ? syscall_exit_to_user_mode+0x22/0x40 ? do_syscall_64+0x69/0x90 ? do_syscall_64+0x69/0x90 ? common_interrupt+0x43/0xa0 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x1470abe3ec6b Code: Unable to access opcode bytes at RIP 0x1470abe3ec41. RSP: 002b:00007fff13ce9108 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: fffffffffffffffc RBX: 00007fff13ce9218 RCX: 00001470abe3ec6b RDX: 00007fff13ce9200 RSI: 00000000c0181b01 RDI: 0000000000000004 RBP: 00007fff13ce91e0 R08: 0000558d9655da10 R09: 0000558d9655dd00 R10: 00007fff13ce95c0 R11: 0000000000000246 R12: 00007fff13ce9358 R13: 0000000000000013 R14: 0000558d9655db50 R15: 00007fff13ce9470 --[ end trace 888a9b92e04c5c97 ]-- Fixes: df15856132bc ("RDMA/qedr: restructure functions that create/destroy QPs") Signed-off-by: Kamal Heib Link: https://lore.kernel.org/r/20240208223628.2040841-1-kheib@redhat.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/qedr/verbs.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 7887a6786ed43..f118ce0a9a617 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1879,8 +1879,17 @@ static int qedr_create_user_qp(struct qedr_dev *dev, /* RQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr, ureq.rq_len, true, 0, alloc_and_init); - if (rc) + if (rc) { + ib_umem_release(qp->usq.umem); + qp->usq.umem = NULL; + if (rdma_protocol_roce(&dev->ibdev, 1)) { + qedr_free_pbl(dev, &qp->usq.pbl_info, + qp->usq.pbl_tbl); + } else { + kfree(qp->usq.pbl_tbl); + } return rc; + } } memset(&in_params, 0, sizeof(in_params)); -- GitLab From 5b3fbd61b9d1f4ed2db95aaf03f9adae0373784d Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 9 Feb 2024 01:55:18 -0800 Subject: [PATCH 0960/1405] net: sysfs: Fix /sys/class/net/ path for statistics The Documentation/ABI/testing/sysfs-class-net-statistics documentation is pointing to the wrong path for the interface. Documentation is pointing to /sys/class/, instead of /sys/class/net/. Fix it by adding the `net/` directory before the interface. Fixes: 6044f9700645 ("net: sysfs: document /sys/class/net/statistics/*") Signed-off-by: Breno Leitao Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- .../ABI/testing/sysfs-class-net-statistics | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-net-statistics b/Documentation/ABI/testing/sysfs-class-net-statistics index 55db27815361b..53e508c6936a5 100644 --- a/Documentation/ABI/testing/sysfs-class-net-statistics +++ b/Documentation/ABI/testing/sysfs-class-net-statistics @@ -1,4 +1,4 @@ -What: /sys/class//statistics/collisions +What: /sys/class/net//statistics/collisions Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -6,7 +6,7 @@ Description: Indicates the number of collisions seen by this network device. This value might not be relevant with all MAC layers. -What: /sys/class//statistics/multicast +What: /sys/class/net//statistics/multicast Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -14,7 +14,7 @@ Description: Indicates the number of multicast packets received by this network device. -What: /sys/class//statistics/rx_bytes +What: /sys/class/net//statistics/rx_bytes Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -23,7 +23,7 @@ Description: See the network driver for the exact meaning of when this value is incremented. -What: /sys/class//statistics/rx_compressed +What: /sys/class/net//statistics/rx_compressed Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -32,7 +32,7 @@ Description: network device. This value might only be relevant for interfaces that support packet compression (e.g: PPP). -What: /sys/class//statistics/rx_crc_errors +What: /sys/class/net//statistics/rx_crc_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -41,7 +41,7 @@ Description: by this network device. Note that the specific meaning might depend on the MAC layer used by the interface. -What: /sys/class//statistics/rx_dropped +What: /sys/class/net//statistics/rx_dropped Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -51,7 +51,7 @@ Description: packet processing. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_errors +What: /sys/class/net//statistics/rx_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -59,7 +59,7 @@ Description: Indicates the number of receive errors on this network device. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_fifo_errors +What: /sys/class/net//statistics/rx_fifo_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -68,7 +68,7 @@ Description: network device. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_frame_errors +What: /sys/class/net//statistics/rx_frame_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -78,7 +78,7 @@ Description: on the MAC layer protocol used. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_length_errors +What: /sys/class/net//statistics/rx_length_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -87,7 +87,7 @@ Description: error, oversized or undersized. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_missed_errors +What: /sys/class/net//statistics/rx_missed_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -96,7 +96,7 @@ Description: due to lack of capacity in the receive side. See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_nohandler +What: /sys/class/net//statistics/rx_nohandler Date: February 2016 KernelVersion: 4.6 Contact: netdev@vger.kernel.org @@ -104,7 +104,7 @@ Description: Indicates the number of received packets that were dropped on an inactive device by the network core. -What: /sys/class//statistics/rx_over_errors +What: /sys/class/net//statistics/rx_over_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -114,7 +114,7 @@ Description: (e.g: larger than MTU). See the network driver for the exact meaning of this value. -What: /sys/class//statistics/rx_packets +What: /sys/class/net//statistics/rx_packets Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -122,7 +122,7 @@ Description: Indicates the total number of good packets received by this network device. -What: /sys/class//statistics/tx_aborted_errors +What: /sys/class/net//statistics/tx_aborted_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -132,7 +132,7 @@ Description: a medium collision). See the network driver for the exact meaning of this value. -What: /sys/class//statistics/tx_bytes +What: /sys/class/net//statistics/tx_bytes Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -143,7 +143,7 @@ Description: transmitted packets or all packets that have been queued for transmission. -What: /sys/class//statistics/tx_carrier_errors +What: /sys/class/net//statistics/tx_carrier_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -152,7 +152,7 @@ Description: because of carrier errors (e.g: physical link down). See the network driver for the exact meaning of this value. -What: /sys/class//statistics/tx_compressed +What: /sys/class/net//statistics/tx_compressed Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -161,7 +161,7 @@ Description: this might only be relevant for devices that support compression (e.g: PPP). -What: /sys/class//statistics/tx_dropped +What: /sys/class/net//statistics/tx_dropped Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -170,7 +170,7 @@ Description: See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_errors +What: /sys/class/net//statistics/tx_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -179,7 +179,7 @@ Description: a network device. See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_fifo_errors +What: /sys/class/net//statistics/tx_fifo_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -188,7 +188,7 @@ Description: FIFO error. See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_heartbeat_errors +What: /sys/class/net//statistics/tx_heartbeat_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -197,7 +197,7 @@ Description: reported as heartbeat errors. See the driver for the exact reasons as to why the packets were dropped. -What: /sys/class//statistics/tx_packets +What: /sys/class/net//statistics/tx_packets Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org @@ -206,7 +206,7 @@ Description: device. See the driver for whether this reports the number of all attempted or successful transmissions. -What: /sys/class//statistics/tx_window_errors +What: /sys/class/net//statistics/tx_window_errors Date: April 2005 KernelVersion: 2.6.12 Contact: netdev@vger.kernel.org -- GitLab From 551539a8606e28cb2a130f8ef3e9834235b456c4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 11 Feb 2024 22:27:35 +0100 Subject: [PATCH 0961/1405] ASoC: rt5645: Make LattePanda board DMI match more precise The DMI strings used for the LattePanda board DMI quirks are very generic. Using the dmidecode database from https://linux-hardware.org/ shows that the chosen DMI strings also match the following 2 laptops which also have a rt5645 codec: Insignia NS-P11W7100 https://linux-hardware.org/?computer=E092FFF8BA04 Insignia NS-P10W8100 https://linux-hardware.org/?computer=AFB6C0BF7934 All 4 hw revisions of the LattePanda board have "S70CR" in their BIOS version DMI strings: DF-BI-7-S70CR100-* DF-BI-7-S70CR110-* DF-BI-7-S70CR200-* LP-BS-7-S70CR700-* See e.g. https://linux-hardware.org/?computer=D98250A817C0 Add a partial (non exact) DMI match on this string to make the LattePanda board DMI match more precise to avoid false-positive matches. Signed-off-by: Hans de Goede Link: https://msgid.link/r/20240211212736.179605-1-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 0cc2fa131d483..69de78ad5da85 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3838,6 +3838,16 @@ static const struct dmi_system_id dmi_platform_data[] = { DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), DMI_EXACT_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), DMI_EXACT_MATCH(DMI_BOARD_VERSION, "Default string"), + /* + * Above strings are too generic, LattePanda BIOS versions for + * all 4 hw revisions are: + * DF-BI-7-S70CR100-* + * DF-BI-7-S70CR110-* + * DF-BI-7-S70CR200-* + * LP-BS-7-S70CR700-* + * Do a partial match for S70CR to avoid false positive matches. + */ + DMI_MATCH(DMI_BIOS_VERSION, "S70CR"), }, .driver_data = (void *)&lattepanda_board_platform_data, }, -- GitLab From d6755a53b8dde434220a164c756190345772843a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 11 Feb 2024 22:27:36 +0100 Subject: [PATCH 0962/1405] ASoC: rt5645: Add DMI quirk for inverted jack-detect on MeeGoPad T8 The MeeGoPad T8 uses the standard rt5645 jd_mode=3 setting for jack-detect, but the used jack connector outputs an inverted jack-detect signal. Add a DMI quirk for this. Signed-off-by: Hans de Goede Link: https://msgid.link/r/20240211212736.179605-2-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 69de78ad5da85..20191a4473c2d 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3693,6 +3693,11 @@ static const struct rt5645_platform_data jd_mode3_monospk_platform_data = { .mono_speaker = true, }; +static const struct rt5645_platform_data jd_mode3_inv_data = { + .jd_mode = 3, + .inv_jd1_1 = true, +}; + static const struct rt5645_platform_data jd_mode3_platform_data = { .jd_mode = 3, }; @@ -3882,6 +3887,16 @@ static const struct dmi_system_id dmi_platform_data[] = { }, .driver_data = (void *)&intel_braswell_platform_data, }, + { + .ident = "Meegopad T08", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Default string"), + DMI_MATCH(DMI_PRODUCT_NAME, "Default string"), + DMI_MATCH(DMI_BOARD_NAME, "T3 MRD"), + DMI_MATCH(DMI_BOARD_VERSION, "V1.1"), + }, + .driver_data = (void *)&jd_mode3_inv_data, + }, { } }; -- GitLab From e56c671c2272d939d48a66be7e73b92b74c560c2 Mon Sep 17 00:00:00 2001 From: Vaishnav Achath Date: Mon, 12 Feb 2024 17:30:49 +0530 Subject: [PATCH 0963/1405] spi: omap2-mcspi: Revert FIFO support without DMA MCSPI controller have few limitations regarding the transaction size when the FIFO buffer is enabled and the WCNT feature is used to find the end of word, in this case if WCNT is not a multiple of the FIFO Almost Empty Level (AEL), then the FIFO empty event is not generated correctly. In addition to this limitation, few other unknown sequence of events that causes the FIFO empty status to not reflect the exact status were found when FIFO is being used without DMA enabled during extended testing in AM65x platform. Till the exact root cause is found and fixed, revert the FIFO support without DMA. See J721E Technical Reference Manual (SPRUI1C), section 12.1.5 for further details: http://www.ti.com/lit/pdf/spruil1 This reverts commit 75223bbea840e ("spi: omap2-mcspi: Add FIFO support without DMA") Signed-off-by: Vaishnav Achath Link: https://msgid.link/r/20240212120049.438495-1-vaishnav.a@ti.com Signed-off-by: Mark Brown --- drivers/spi/spi-omap2-mcspi.c | 137 ++-------------------------------- 1 file changed, 8 insertions(+), 129 deletions(-) diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index a0c9fea908f55..ddf1c684bcc7d 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -53,8 +53,6 @@ /* per-register bitmasks: */ #define OMAP2_MCSPI_IRQSTATUS_EOW BIT(17) -#define OMAP2_MCSPI_IRQSTATUS_TX0_EMPTY BIT(0) -#define OMAP2_MCSPI_IRQSTATUS_RX0_FULL BIT(2) #define OMAP2_MCSPI_MODULCTRL_SINGLE BIT(0) #define OMAP2_MCSPI_MODULCTRL_MS BIT(2) @@ -293,7 +291,7 @@ static void omap2_mcspi_set_mode(struct spi_controller *ctlr) } static void omap2_mcspi_set_fifo(const struct spi_device *spi, - struct spi_transfer *t, int enable, int dma_enabled) + struct spi_transfer *t, int enable) { struct spi_controller *ctlr = spi->controller; struct omap2_mcspi_cs *cs = spi->controller_state; @@ -314,28 +312,20 @@ static void omap2_mcspi_set_fifo(const struct spi_device *spi, max_fifo_depth = OMAP2_MCSPI_MAX_FIFODEPTH / 2; else max_fifo_depth = OMAP2_MCSPI_MAX_FIFODEPTH; - if (dma_enabled) - wcnt = t->len / bytes_per_word; - else - wcnt = 0; + + wcnt = t->len / bytes_per_word; if (wcnt > OMAP2_MCSPI_MAX_FIFOWCNT) goto disable_fifo; xferlevel = wcnt << 16; if (t->rx_buf != NULL) { chconf |= OMAP2_MCSPI_CHCONF_FFER; - if (dma_enabled) - xferlevel |= (bytes_per_word - 1) << 8; - else - xferlevel |= (max_fifo_depth - 1) << 8; + xferlevel |= (bytes_per_word - 1) << 8; } if (t->tx_buf != NULL) { chconf |= OMAP2_MCSPI_CHCONF_FFET; - if (dma_enabled) - xferlevel |= bytes_per_word - 1; - else - xferlevel |= (max_fifo_depth - 1); + xferlevel |= bytes_per_word - 1; } mcspi_write_reg(ctlr, OMAP2_MCSPI_XFERLEVEL, xferlevel); @@ -892,113 +882,6 @@ omap2_mcspi_txrx_pio(struct spi_device *spi, struct spi_transfer *xfer) return count - c; } -static unsigned -omap2_mcspi_txrx_piofifo(struct spi_device *spi, struct spi_transfer *xfer) -{ - struct omap2_mcspi_cs *cs = spi->controller_state; - struct omap2_mcspi *mcspi; - unsigned int count, c; - unsigned int iter, cwc; - int last_request; - void __iomem *base = cs->base; - void __iomem *tx_reg; - void __iomem *rx_reg; - void __iomem *chstat_reg; - void __iomem *irqstat_reg; - int word_len, bytes_per_word; - u8 *rx; - const u8 *tx; - - mcspi = spi_controller_get_devdata(spi->controller); - count = xfer->len; - c = count; - word_len = cs->word_len; - bytes_per_word = mcspi_bytes_per_word(word_len); - - /* - * We store the pre-calculated register addresses on stack to speed - * up the transfer loop. - */ - tx_reg = base + OMAP2_MCSPI_TX0; - rx_reg = base + OMAP2_MCSPI_RX0; - chstat_reg = base + OMAP2_MCSPI_CHSTAT0; - irqstat_reg = base + OMAP2_MCSPI_IRQSTATUS; - - if (c < (word_len >> 3)) - return 0; - - rx = xfer->rx_buf; - tx = xfer->tx_buf; - - do { - /* calculate number of words in current iteration */ - cwc = min((unsigned int)mcspi->fifo_depth / bytes_per_word, - c / bytes_per_word); - last_request = cwc != (mcspi->fifo_depth / bytes_per_word); - if (tx) { - if (mcspi_wait_for_reg_bit(irqstat_reg, - OMAP2_MCSPI_IRQSTATUS_TX0_EMPTY) < 0) { - dev_err(&spi->dev, "TX Empty timed out\n"); - goto out; - } - writel_relaxed(OMAP2_MCSPI_IRQSTATUS_TX0_EMPTY, irqstat_reg); - - for (iter = 0; iter < cwc; iter++, tx += bytes_per_word) { - if (bytes_per_word == 1) - writel_relaxed(*tx, tx_reg); - else if (bytes_per_word == 2) - writel_relaxed(*((u16 *)tx), tx_reg); - else if (bytes_per_word == 4) - writel_relaxed(*((u32 *)tx), tx_reg); - } - } - - if (rx) { - if (!last_request && - mcspi_wait_for_reg_bit(irqstat_reg, - OMAP2_MCSPI_IRQSTATUS_RX0_FULL) < 0) { - dev_err(&spi->dev, "RX_FULL timed out\n"); - goto out; - } - writel_relaxed(OMAP2_MCSPI_IRQSTATUS_RX0_FULL, irqstat_reg); - - for (iter = 0; iter < cwc; iter++, rx += bytes_per_word) { - if (last_request && - mcspi_wait_for_reg_bit(chstat_reg, - OMAP2_MCSPI_CHSTAT_RXS) < 0) { - dev_err(&spi->dev, "RXS timed out\n"); - goto out; - } - if (bytes_per_word == 1) - *rx = readl_relaxed(rx_reg); - else if (bytes_per_word == 2) - *((u16 *)rx) = readl_relaxed(rx_reg); - else if (bytes_per_word == 4) - *((u32 *)rx) = readl_relaxed(rx_reg); - } - } - - if (last_request) { - if (mcspi_wait_for_reg_bit(chstat_reg, - OMAP2_MCSPI_CHSTAT_EOT) < 0) { - dev_err(&spi->dev, "EOT timed out\n"); - goto out; - } - if (mcspi_wait_for_reg_bit(chstat_reg, - OMAP2_MCSPI_CHSTAT_TXFFE) < 0) { - dev_err(&spi->dev, "TXFFE timed out\n"); - goto out; - } - omap2_mcspi_set_enable(spi, 0); - } - c -= cwc * bytes_per_word; - } while (c >= bytes_per_word); - -out: - omap2_mcspi_set_enable(spi, 1); - return count - c; -} - static u32 omap2_mcspi_calc_divisor(u32 speed_hz, u32 ref_clk_hz) { u32 div; @@ -1323,9 +1206,7 @@ static int omap2_mcspi_transfer_one(struct spi_controller *ctlr, if ((mcspi_dma->dma_rx && mcspi_dma->dma_tx) && ctlr->cur_msg_mapped && ctlr->can_dma(ctlr, spi, t)) - omap2_mcspi_set_fifo(spi, t, 1, 1); - else if (t->len > OMAP2_MCSPI_MAX_FIFODEPTH) - omap2_mcspi_set_fifo(spi, t, 1, 0); + omap2_mcspi_set_fifo(spi, t, 1); omap2_mcspi_set_enable(spi, 1); @@ -1338,8 +1219,6 @@ static int omap2_mcspi_transfer_one(struct spi_controller *ctlr, ctlr->cur_msg_mapped && ctlr->can_dma(ctlr, spi, t)) count = omap2_mcspi_txrx_dma(spi, t); - else if (mcspi->fifo_depth > 0) - count = omap2_mcspi_txrx_piofifo(spi, t); else count = omap2_mcspi_txrx_pio(spi, t); @@ -1352,7 +1231,7 @@ static int omap2_mcspi_transfer_one(struct spi_controller *ctlr, omap2_mcspi_set_enable(spi, 0); if (mcspi->fifo_depth > 0) - omap2_mcspi_set_fifo(spi, t, 0, 0); + omap2_mcspi_set_fifo(spi, t, 0); out: /* Restore defaults if they were overriden */ @@ -1375,7 +1254,7 @@ static int omap2_mcspi_transfer_one(struct spi_controller *ctlr, omap2_mcspi_set_cs(spi, !(spi->mode & SPI_CS_HIGH)); if (mcspi->fifo_depth > 0 && t) - omap2_mcspi_set_fifo(spi, t, 0, 0); + omap2_mcspi_set_fifo(spi, t, 0); return status; } -- GitLab From 8f44e3808200c1434c26ef459722f88f48b306df Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 12 Feb 2024 10:20:27 +0200 Subject: [PATCH 0964/1405] spi: intel-pci: Add support for Lunar Lake-M SPI serial flash Add Intel Lunar Lake-M PCI ID to the driver list of supported devices. This is the same controller found in previous generations. Signed-off-by: Mika Westerberg Link: https://msgid.link/r/20240212082027.2462849-1-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-intel-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index 07d20ca1164c3..4337ca51d7aa2 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -85,6 +85,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0xa2a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xa324), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xa3a4), (unsigned long)&cnl_info }, + { PCI_VDEVICE(INTEL, 0xa823), (unsigned long)&cnl_info }, { }, }; MODULE_DEVICE_TABLE(pci, intel_spi_pci_ids); -- GitLab From 2f0dbb24f78a333433a2b875c0b76bf55c119cd4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 11 Feb 2024 16:58:17 +0000 Subject: [PATCH 0965/1405] regmap: kunit: Ensure that changed bytes are actually different During the cache sync test we verify that values we expect to have been written only to the cache do not appear in the hardware. This works most of the time but since we randomly generate both the original and new values there is a low probability that these values may actually be the same. Wrap get_random_bytes() to ensure that the values are different, there are other tests which should have similar verification that we actually changed something. While we're at it refactor the test to use three changed values rather than attempting to use one of them twice, that just complicates checking that our new values are actually new. We use random generation to try to avoid data dependencies in the tests. Reported-by: Guenter Roeck Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Mark Brown Link: https://msgid.link/r/20240211-regmap-kunit-random-change-v3-1-e387a9ea4468@kernel.org Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-kunit.c | 54 +++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/drivers/base/regmap/regmap-kunit.c b/drivers/base/regmap/regmap-kunit.c index 4eb18f5d32655..0d957c5f1bcc9 100644 --- a/drivers/base/regmap/regmap-kunit.c +++ b/drivers/base/regmap/regmap-kunit.c @@ -9,6 +9,23 @@ #define BLOCK_TEST_SIZE 12 +static void get_changed_bytes(void *orig, void *new, size_t size) +{ + char *o = orig; + char *n = new; + int i; + + get_random_bytes(new, size); + + /* + * This could be nicer and more efficient but we shouldn't + * super care. + */ + for (i = 0; i < size; i++) + while (n[i] == o[i]) + get_random_bytes(&n[i], 1); +} + static const struct regmap_config test_regmap_config = { .max_register = BLOCK_TEST_SIZE, .reg_stride = 1, @@ -1252,7 +1269,7 @@ static void raw_sync(struct kunit *test) struct regmap *map; struct regmap_config config; struct regmap_ram_data *data; - u16 val[2]; + u16 val[3]; u16 *hw_buf; unsigned int rval; int i; @@ -1266,17 +1283,13 @@ static void raw_sync(struct kunit *test) hw_buf = (u16 *)data->vals; - get_random_bytes(&val, sizeof(val)); + get_changed_bytes(&hw_buf[2], &val[0], sizeof(val)); /* Do a regular write and a raw write in cache only mode */ regcache_cache_only(map, true); - KUNIT_EXPECT_EQ(test, 0, regmap_raw_write(map, 2, val, sizeof(val))); - if (config.val_format_endian == REGMAP_ENDIAN_BIG) - KUNIT_EXPECT_EQ(test, 0, regmap_write(map, 6, - be16_to_cpu(val[0]))); - else - KUNIT_EXPECT_EQ(test, 0, regmap_write(map, 6, - le16_to_cpu(val[0]))); + KUNIT_EXPECT_EQ(test, 0, regmap_raw_write(map, 2, val, + sizeof(u16) * 2)); + KUNIT_EXPECT_EQ(test, 0, regmap_write(map, 4, val[2])); /* We should read back the new values, and defaults for the rest */ for (i = 0; i < config.max_register + 1; i++) { @@ -1285,24 +1298,34 @@ static void raw_sync(struct kunit *test) switch (i) { case 2: case 3: - case 6: if (config.val_format_endian == REGMAP_ENDIAN_BIG) { KUNIT_EXPECT_EQ(test, rval, - be16_to_cpu(val[i % 2])); + be16_to_cpu(val[i - 2])); } else { KUNIT_EXPECT_EQ(test, rval, - le16_to_cpu(val[i % 2])); + le16_to_cpu(val[i - 2])); } break; + case 4: + KUNIT_EXPECT_EQ(test, rval, val[i - 2]); + break; default: KUNIT_EXPECT_EQ(test, config.reg_defaults[i].def, rval); break; } } + + /* + * The value written via _write() was translated by the core, + * translate the original copy for comparison purposes. + */ + if (config.val_format_endian == REGMAP_ENDIAN_BIG) + val[2] = cpu_to_be16(val[2]); + else + val[2] = cpu_to_le16(val[2]); /* The values should not appear in the "hardware" */ - KUNIT_EXPECT_MEMNEQ(test, &hw_buf[2], val, sizeof(val)); - KUNIT_EXPECT_MEMNEQ(test, &hw_buf[6], val, sizeof(u16)); + KUNIT_EXPECT_MEMNEQ(test, &hw_buf[2], &val[0], sizeof(val)); for (i = 0; i < config.max_register + 1; i++) data->written[i] = false; @@ -1313,8 +1336,7 @@ static void raw_sync(struct kunit *test) KUNIT_EXPECT_EQ(test, 0, regcache_sync(map)); /* The values should now appear in the "hardware" */ - KUNIT_EXPECT_MEMEQ(test, &hw_buf[2], val, sizeof(val)); - KUNIT_EXPECT_MEMEQ(test, &hw_buf[6], val, sizeof(u16)); + KUNIT_EXPECT_MEMEQ(test, &hw_buf[2], &val[0], sizeof(val)); regmap_exit(map); } -- GitLab From 2c80a2b715df75881359d07dbaacff8ad411f40e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 12 Feb 2024 12:22:17 +0100 Subject: [PATCH 0966/1405] nouveau/svm: fix kvcalloc() argument order The conversion to kvcalloc() mixed up the object size and count arguments, causing a warning: drivers/gpu/drm/nouveau/nouveau_svm.c: In function 'nouveau_svm_fault_buffer_ctor': drivers/gpu/drm/nouveau/nouveau_svm.c:1010:40: error: 'kvcalloc' sizes specified with 'sizeof' in the earlier argument and not in the later argument [-Werror=calloc-transposed-args] 1010 | buffer->fault = kvcalloc(sizeof(*buffer->fault), buffer->entries, GFP_KERNEL); | ^ drivers/gpu/drm/nouveau/nouveau_svm.c:1010:40: note: earlier argument should specify number of elements, later size of each element The behavior is still correct aside from the warning, but fixing it avoids the warnings and can help the compiler track the individual objects better. Fixes: 71e4bbca070e ("nouveau/svm: Use kvcalloc() instead of kvzalloc()") Signed-off-by: Arnd Bergmann Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240212112230.1117284-1-arnd@kernel.org --- drivers/gpu/drm/nouveau/nouveau_svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index cc03e0c22ff3f..5e4565c5011a9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -1011,7 +1011,7 @@ nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id) if (ret) return ret; - buffer->fault = kvcalloc(sizeof(*buffer->fault), buffer->entries, GFP_KERNEL); + buffer->fault = kvcalloc(buffer->entries, sizeof(*buffer->fault), GFP_KERNEL); if (!buffer->fault) return -ENOMEM; -- GitLab From f7fe85b229bc30cb5dc95b4e9015a601c9e3a8cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Attila=20T=C5=91k=C3=A9s?= Date: Sat, 10 Feb 2024 21:36:38 +0200 Subject: [PATCH 0967/1405] ASoC: amd: yc: Fix non-functional mic on Lenovo 82UU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Like many other models, the Lenovo 82UU (Yoga Slim 7 Pro 14ARH7) needs a quirk entry for the internal microphone to function. Signed-off-by: Attila Tőkés Link: https://msgid.link/r/20240210193638.144028-1-attitokes@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 80ad60d485ea0..cc231185d72c3 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -234,6 +234,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "82UG"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82UU"), + } + }, { .driver_data = &acp6x_card, .matches = { -- GitLab From 83ef106fa732aea8558253641cd98e8a895604d7 Mon Sep 17 00:00:00 2001 From: Viken Dadhaniya Date: Mon, 12 Feb 2024 18:22:39 +0530 Subject: [PATCH 0968/1405] i2c: qcom-geni: Correct I2C TRE sequence For i2c read operation in GSI mode, we are getting timeout due to malformed TRE basically incorrect TRE sequence in gpi(drivers/dma/qcom/gpi.c) driver. I2C driver has geni_i2c_gpi(I2C_WRITE) function which generates GO TRE and geni_i2c_gpi(I2C_READ)generates DMA TRE. Hence to generate GO TRE before DMA TRE, we should move geni_i2c_gpi(I2C_WRITE) before geni_i2c_gpi(I2C_READ) inside the I2C GSI mode transfer function i.e. geni_i2c_gpi_xfer(). TRE stands for Transfer Ring Element - which is basically an element with size of 4 words. It contains all information like slave address, clk divider, dma address value data size etc). Mainly we have 3 TREs(Config, GO and DMA tre). - CONFIG TRE : consists of internal register configuration which is required before start of the transfer. - DMA TRE : contains DDR/Memory address, called as DMA descriptor. - GO TRE : contains Transfer directions, slave ID, Delay flags, Length of the transfer. I2c driver calls GPI driver API to config each TRE depending on the protocol. For read operation tre sequence will be as below which is not aligned to hardware programming guide. - CONFIG tre - DMA tre - GO tre As per Qualcomm's internal Hardware Programming Guide, we should configure TREs in below sequence for any RX only transfer. - CONFIG tre - GO tre - DMA tre Fixes: d8703554f4de ("i2c: qcom-geni: Add support for GPI DMA") Reviewed-by: Andi Shyti Reviewed-by: Bryan O'Donoghue Tested-by: Bryan O'Donoghue # qrb5165-rb5 Co-developed-by: Mukesh Kumar Savaliya Signed-off-by: Mukesh Kumar Savaliya Signed-off-by: Viken Dadhaniya Reviewed-by: Dmitry Baryshkov Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-qcom-geni.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 0d2e7171e3a6f..da94df466e83c 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -613,20 +613,20 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i peripheral.addr = msgs[i].addr; + ret = geni_i2c_gpi(gi2c, &msgs[i], &config, + &tx_addr, &tx_buf, I2C_WRITE, gi2c->tx_c); + if (ret) + goto err; + if (msgs[i].flags & I2C_M_RD) { ret = geni_i2c_gpi(gi2c, &msgs[i], &config, &rx_addr, &rx_buf, I2C_READ, gi2c->rx_c); if (ret) goto err; - } - - ret = geni_i2c_gpi(gi2c, &msgs[i], &config, - &tx_addr, &tx_buf, I2C_WRITE, gi2c->tx_c); - if (ret) - goto err; - if (msgs[i].flags & I2C_M_RD) dma_async_issue_pending(gi2c->rx_c); + } + dma_async_issue_pending(gi2c->tx_c); timeout = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT); -- GitLab From 7d8c67dd5d4f2ed1907e192c73dd948d35145641 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 12 Jan 2024 10:59:03 -0800 Subject: [PATCH 0969/1405] xen/xenbus: document will_handle argument for xenbus_watch_path() Commit 2e85d32b1c86 ("xen/xenbus: Add 'will_handle' callback support in xenbus_watch_path()") added will_handle argument to xenbus_watch_path() and its wrapper, xenbus_watch_pathfmt(), but didn't document it on the kerneldoc comments of the function. This is causing warnings that reported by kernel test robot. Add the documentation to fix it. Fixes: 2e85d32b1c86 ("xen/xenbus: Add 'will_handle' callback support in xenbus_watch_path()") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401121154.FI8jDGun-lkp@intel.com/ Signed-off-by: SeongJae Park Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20240112185903.83737-1-sj@kernel.org Signed-off-by: Juergen Gross --- drivers/xen/xenbus/xenbus_client.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 32835b4b9bc50..51b3124b0d56c 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -116,14 +116,15 @@ EXPORT_SYMBOL_GPL(xenbus_strstate); * @dev: xenbus device * @path: path to watch * @watch: watch to register + * @will_handle: events queuing determine callback * @callback: callback to register * * Register a @watch on the given path, using the given xenbus_watch structure - * for storage, and the given @callback function as the callback. On success, - * the given @path will be saved as @watch->node, and remains the - * caller's to free. On error, @watch->node will - * be NULL, the device will switch to %XenbusStateClosing, and the error will - * be saved in the store. + * for storage, @will_handle function as the callback to determine if each + * event need to be queued, and the given @callback function as the callback. + * On success, the given @path will be saved as @watch->node, and remains the + * caller's to free. On error, @watch->node will be NULL, the device will + * switch to %XenbusStateClosing, and the error will be saved in the store. * * Returns: %0 on success or -errno on error */ @@ -158,11 +159,13 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path); * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path * @dev: xenbus device * @watch: watch to register + * @will_handle: events queuing determine callback * @callback: callback to register * @pathfmt: format of path to watch * * Register a watch on the given @path, using the given xenbus_watch - * structure for storage, and the given @callback function as the + * structure for storage, @will_handle function as the callback to determine if + * each event need to be queued, and the given @callback function as the * callback. On success, the watched path (@path/@path2) will be saved * as @watch->node, and becomes the caller's to kfree(). * On error, watch->node will be NULL, so the caller has nothing to -- GitLab From 4508ec17357094e2075f334948393ddedbb75157 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Sun, 11 Feb 2024 20:19:30 -0300 Subject: [PATCH 0970/1405] smb: client: set correct id, uid and cruid for multiuser automounts When uid, gid and cruid are not specified, we need to dynamically set them into the filesystem context used for automounting otherwise they'll end up reusing the values from the parent mount. Fixes: 9fd29a5bae6e ("cifs: use fs_context for automounts") Reported-by: Shane Nehring Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2259257 Cc: stable@vger.kernel.org # 6.2+ Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/namespace.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/smb/client/namespace.c b/fs/smb/client/namespace.c index a6968573b775e..4a517b280f2b7 100644 --- a/fs/smb/client/namespace.c +++ b/fs/smb/client/namespace.c @@ -168,6 +168,21 @@ static char *automount_fullpath(struct dentry *dentry, void *page) return s; } +static void fs_context_set_ids(struct smb3_fs_context *ctx) +{ + kuid_t uid = current_fsuid(); + kgid_t gid = current_fsgid(); + + if (ctx->multiuser) { + if (!ctx->uid_specified) + ctx->linux_uid = uid; + if (!ctx->gid_specified) + ctx->linux_gid = gid; + } + if (!ctx->cruid_specified) + ctx->cred_uid = uid; +} + /* * Create a vfsmount that we can automount */ @@ -205,6 +220,7 @@ static struct vfsmount *cifs_do_automount(struct path *path) tmp.leaf_fullpath = NULL; tmp.UNC = tmp.prepath = NULL; tmp.dfs_root_ses = NULL; + fs_context_set_ids(&tmp); rc = smb3_fs_context_dup(ctx, &tmp); if (rc) { -- GitLab From 8bde59b20de06339d598e8b05e5195f7c631c38b Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Sun, 11 Feb 2024 20:19:31 -0300 Subject: [PATCH 0971/1405] smb: client: handle path separator of created SMB symlinks Convert path separator to CIFS_DIR_SEP(cifs_sb) from symlink target before sending it over the wire otherwise the created SMB symlink may become innaccesible from server side. Fixes: 514d793e27a3 ("smb: client: allow creating symlinks via reparse points") Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 6b3c384ead0d8..4695433fcf397 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -5217,7 +5217,7 @@ static int smb2_create_reparse_symlink(const unsigned int xid, struct inode *new; struct kvec iov; __le16 *path; - char *sym; + char *sym, sep = CIFS_DIR_SEP(cifs_sb); u16 len, plen; int rc = 0; @@ -5231,7 +5231,8 @@ static int smb2_create_reparse_symlink(const unsigned int xid, .symlink_target = sym, }; - path = cifs_convert_path_to_utf16(symname, cifs_sb); + convert_delimiter(sym, sep); + path = cifs_convert_path_to_utf16(sym, cifs_sb); if (!path) { rc = -ENOMEM; goto out; @@ -5254,7 +5255,10 @@ static int smb2_create_reparse_symlink(const unsigned int xid, buf->PrintNameLength = cpu_to_le16(plen); memcpy(buf->PathBuffer, path, plen); buf->Flags = cpu_to_le32(*symname != '/' ? SYMLINK_FLAG_RELATIVE : 0); + if (*sym != sep) + buf->Flags = cpu_to_le32(SYMLINK_FLAG_RELATIVE); + convert_delimiter(sym, '/'); iov.iov_base = buf; iov.iov_len = len; new = smb2_get_reparse_inode(&data, inode->i_sb, xid, -- GitLab From 3693bb4465e6e32a204a5b86d3ec7e6b9f7e67c2 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Fri, 19 Jan 2024 17:49:48 +0800 Subject: [PATCH 0972/1405] x86/xen: Add some null pointer checking to smp.c kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. Signed-off-by: Kunwu Chan Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401161119.iof6BQsf-lkp@intel.com/ Suggested-by: Markus Elfring Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20240119094948.275390-1-chentao@kylinos.cn Signed-off-by: Juergen Gross --- arch/x86/xen/smp.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 4b0d6fff88de5..1fb9a1644d944 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -65,6 +65,8 @@ int xen_smp_intr_init(unsigned int cpu) char *resched_name, *callfunc_name, *debug_name; resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); + if (!resched_name) + goto fail_mem; per_cpu(xen_resched_irq, cpu).name = resched_name; rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, cpu, @@ -77,6 +79,8 @@ int xen_smp_intr_init(unsigned int cpu) per_cpu(xen_resched_irq, cpu).irq = rc; callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu); + if (!callfunc_name) + goto fail_mem; per_cpu(xen_callfunc_irq, cpu).name = callfunc_name; rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR, cpu, @@ -90,6 +94,9 @@ int xen_smp_intr_init(unsigned int cpu) if (!xen_fifo_events) { debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu); + if (!debug_name) + goto fail_mem; + per_cpu(xen_debug_irq, cpu).name = debug_name; rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt, @@ -101,6 +108,9 @@ int xen_smp_intr_init(unsigned int cpu) } callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu); + if (!callfunc_name) + goto fail_mem; + per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name; rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR, cpu, @@ -114,6 +124,8 @@ int xen_smp_intr_init(unsigned int cpu) return 0; + fail_mem: + rc = -ENOMEM; fail: xen_smp_intr_free(cpu); return rc; -- GitLab From d55347bfe4e66dce2e1e7501e5492f4af3e315f8 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 11 Feb 2024 08:08:37 -0800 Subject: [PATCH 0973/1405] MIPS: Add 'memory' clobber to csum_ipv6_magic() inline assembler After 'lib: checksum: Use aligned accesses for ip_fast_csum and csum_ipv6_magic tests' was applied, the test_csum_ipv6_magic unit test started failing for all mips platforms, both little and bit endian. Oddly enough, adding debug code into test_csum_ipv6_magic() made the problem disappear. The gcc manual says: "The "memory" clobber tells the compiler that the assembly code performs memory reads or writes to items other than those listed in the input and output operands (for example, accessing the memory pointed to by one of the input parameters) " This is definitely the case for csum_ipv6_magic(). Indeed, adding the 'memory' clobber fixes the problem. Cc: Charlie Jenkins Cc: Palmer Dabbelt Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Guenter Roeck Reviewed-by: Charlie Jenkins Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/checksum.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h index 4044eaf989ac7..0921ddda11a4b 100644 --- a/arch/mips/include/asm/checksum.h +++ b/arch/mips/include/asm/checksum.h @@ -241,7 +241,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, " .set pop" : "=&r" (sum), "=&r" (tmp) : "r" (saddr), "r" (daddr), - "0" (htonl(len)), "r" (htonl(proto)), "r" (sum)); + "0" (htonl(len)), "r" (htonl(proto)), "r" (sum) + : "memory"); return csum_fold(sum); } -- GitLab From 11ba1728be3edb6928791f4c622f154ebe228ae6 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Fri, 2 Feb 2024 12:30:26 +0000 Subject: [PATCH 0974/1405] ptrace: Introduce exception_ip arch hook On architectures with delay slot, architecture level instruction pointer (or program counter) in pt_regs may differ from where exception was triggered. Introduce exception_ip hook to invoke architecture code and determine actual instruction pointer to the exception. Link: https://lore.kernel.org/lkml/00d1b813-c55f-4365-8d81-d70258e10b16@app.fastmail.com/ Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/ptrace.h | 2 ++ arch/mips/kernel/ptrace.c | 7 +++++++ include/linux/ptrace.h | 4 ++++ 3 files changed, 13 insertions(+) diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index daf3cf244ea97..701a233583c2c 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -154,6 +154,8 @@ static inline long regs_return_value(struct pt_regs *regs) } #define instruction_pointer(regs) ((regs)->cp0_epc) +extern unsigned long exception_ip(struct pt_regs *regs); +#define exception_ip(regs) exception_ip(regs) #define profile_pc(regs) instruction_pointer(regs) extern asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall); diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index d9df543f7e2c4..59288c13b581b 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -48,6 +49,12 @@ #define CREATE_TRACE_POINTS #include +unsigned long exception_ip(struct pt_regs *regs) +{ + return exception_epc(regs); +} +EXPORT_SYMBOL(exception_ip); + /* * Called by kernel/ptrace.c when detaching.. * diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index eaaef3ffec221..90507d4afcd6d 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -393,6 +393,10 @@ static inline void user_single_step_report(struct pt_regs *regs) #define current_user_stack_pointer() user_stack_pointer(current_pt_regs()) #endif +#ifndef exception_ip +#define exception_ip(x) instruction_pointer(x) +#endif + extern int task_current_syscall(struct task_struct *target, struct syscall_info *info); extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact); -- GitLab From 9d6e21ddf20293b3880ae55b9d14de91c5891c59 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Fri, 2 Feb 2024 12:30:27 +0000 Subject: [PATCH 0975/1405] MIPS: Clear Cause.BD in instruction_pointer_set Clear Cause.BD after we use instruction_pointer_set to override EPC. This can prevent exception_epc check against instruction code at new return address. It won't be considered as "in delay slot" after epc being overridden anyway. Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/ptrace.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index 701a233583c2c..d14d0e37ad02d 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -60,6 +60,7 @@ static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long val) { regs->cp0_epc = val; + regs->cp0_cause &= ~CAUSEF_BD; } /* Query offset/name of register from its name/offset */ -- GitLab From 8fa5070833886268e4fb646daaca99f725b378e9 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Fri, 2 Feb 2024 12:30:28 +0000 Subject: [PATCH 0976/1405] mm/memory: Use exception ip to search exception tables On architectures with delay slot, instruction_pointer() may differ from where exception was triggered. Use exception_ip we just introduced to search exception tables to get rid of the problem. Fixes: 4bce37a68ff8 ("mips/mm: Convert to using lock_mm_and_find_vma()") Reported-by: Xi Ruoyao Link: https://lore.kernel.org/r/75e9fd7b08562ad9b456a5bdaacb7cc220311cc9.camel@xry111.site/ Suggested-by: Linus Torvalds Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- mm/memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 89bcae0b224d6..15f8b10ea17c4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5478,7 +5478,7 @@ static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs return true; if (regs && !user_mode(regs)) { - unsigned long ip = instruction_pointer(regs); + unsigned long ip = exception_ip(regs); if (!search_exception_tables(ip)) return false; } @@ -5503,7 +5503,7 @@ static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, struct pt_r { mmap_read_unlock(mm); if (regs && !user_mode(regs)) { - unsigned long ip = instruction_pointer(regs); + unsigned long ip = exception_ip(regs); if (!search_exception_tables(ip)) return false; } -- GitLab From d794734c9bbfe22f86686dc2909c25f5ffe1a572 Mon Sep 17 00:00:00 2001 From: Steve Wahl Date: Fri, 26 Jan 2024 10:48:41 -0600 Subject: [PATCH 0977/1405] x86/mm/ident_map: Use gbpages only where full GB page should be mapped. When ident_pud_init() uses only gbpages to create identity maps, large ranges of addresses not actually requested can be included in the resulting table; a 4K request will map a full GB. On UV systems, this ends up including regions that will cause hardware to halt the system if accessed (these are marked "reserved" by BIOS). Even processor speculation into these regions is enough to trigger the system halt. Only use gbpages when map creation requests include the full GB page of space. Fall back to using smaller 2M pages when only portions of a GB page are included in the request. No attempt is made to coalesce mapping requests. If a request requires a map entry at the 2M (pmd) level, subsequent mapping requests within the same 1G region will also be at the pmd level, even if adjacent or overlapping such requests could have been combined to map a full gbpage. Existing usage starts with larger regions and then adds smaller regions, so this should not have any great consequence. [ dhansen: fix up comment formatting, simplifty changelog ] Signed-off-by: Steve Wahl Signed-off-by: Dave Hansen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240126164841.170866-1-steve.wahl%40hpe.com --- arch/x86/mm/ident_map.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index 968d7005f4a72..f50cc210a9818 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -26,18 +26,31 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, for (; addr < end; addr = next) { pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; + bool use_gbpage; next = (addr & PUD_MASK) + PUD_SIZE; if (next > end) next = end; - if (info->direct_gbpages) { - pud_t pudval; + /* if this is already a gbpage, this portion is already mapped */ + if (pud_large(*pud)) + continue; + + /* Is using a gbpage allowed? */ + use_gbpage = info->direct_gbpages; - if (pud_present(*pud)) - continue; + /* Don't use gbpage if it maps more than the requested region. */ + /* at the begining: */ + use_gbpage &= ((addr & ~PUD_MASK) == 0); + /* ... or at the end: */ + use_gbpage &= ((next & ~PUD_MASK) == 0); + + /* Never overwrite existing mappings */ + use_gbpage &= !pud_present(*pud); + + if (use_gbpage) { + pud_t pudval; - addr &= PUD_MASK; pudval = __pud((addr - info->offset) | info->page_flag); set_pud(pud, pudval); continue; -- GitLab From 73d9629e1c8c1982f13688c4d1019c3994647ccc Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 8 Feb 2024 10:03:33 -0800 Subject: [PATCH 0978/1405] i40e: Do not allow untrusted VF to remove administratively set MAC Currently when PF administratively sets VF's MAC address and the VF is put down (VF tries to delete all MACs) then the MAC is removed from MAC filters and primary VF MAC is zeroed. Do not allow untrusted VF to remove primary MAC when it was set administratively by PF. Reproducer: 1) Create VF 2) Set VF interface up 3) Administratively set the VF's MAC 4) Put VF interface down [root@host ~]# echo 1 > /sys/class/net/enp2s0f0/device/sriov_numvfs [root@host ~]# ip link set enp2s0f0v0 up [root@host ~]# ip link set enp2s0f0 vf 0 mac fe:6c:b5:da:c7:7d [root@host ~]# ip link show enp2s0f0 23: enp2s0f0: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000 link/ether 3c:ec:ef:b7:dd:04 brd ff:ff:ff:ff:ff:ff vf 0 link/ether fe:6c:b5:da:c7:7d brd ff:ff:ff:ff:ff:ff, spoof checking on, link-state auto, trust off [root@host ~]# ip link set enp2s0f0v0 down [root@host ~]# ip link show enp2s0f0 23: enp2s0f0: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000 link/ether 3c:ec:ef:b7:dd:04 brd ff:ff:ff:ff:ff:ff vf 0 link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff, spoof checking on, link-state auto, trust off Fixes: 700bbf6c1f9e ("i40e: allow VF to remove any MAC filter") Fixes: ceb29474bbbc ("i40e: Add support for VF to specify its primary MAC address") Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240208180335.1844996-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 38 ++++++++++++++++--- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 908cdbd3ec5d4..b34c717708874 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2848,6 +2848,24 @@ static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg) (u8 *)&stats, sizeof(stats)); } +/** + * i40e_can_vf_change_mac + * @vf: pointer to the VF info + * + * Return true if the VF is allowed to change its MAC filters, false otherwise + */ +static bool i40e_can_vf_change_mac(struct i40e_vf *vf) +{ + /* If the VF MAC address has been set administratively (via the + * ndo_set_vf_mac command), then deny permission to the VF to + * add/delete unicast MAC addresses, unless the VF is trusted + */ + if (vf->pf_set_mac && !vf->trusted) + return false; + + return true; +} + #define I40E_MAX_MACVLAN_PER_HW 3072 #define I40E_MAX_MACVLAN_PER_PF(num_ports) (I40E_MAX_MACVLAN_PER_HW / \ (num_ports)) @@ -2907,8 +2925,8 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, * The VF may request to set the MAC address filter already * assigned to it so do not return an error in that case. */ - if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) && - !is_multicast_ether_addr(addr) && vf->pf_set_mac && + if (!i40e_can_vf_change_mac(vf) && + !is_multicast_ether_addr(addr) && !ether_addr_equal(addr, vf->default_lan_addr.addr)) { dev_err(&pf->pdev->dev, "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n"); @@ -3114,19 +3132,29 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) ret = -EINVAL; goto error_param; } - if (ether_addr_equal(al->list[i].addr, vf->default_lan_addr.addr)) - was_unimac_deleted = true; } vsi = pf->vsi[vf->lan_vsi_idx]; spin_lock_bh(&vsi->mac_filter_hash_lock); /* delete addresses from the list */ - for (i = 0; i < al->num_elements; i++) + for (i = 0; i < al->num_elements; i++) { + const u8 *addr = al->list[i].addr; + + /* Allow to delete VF primary MAC only if it was not set + * administratively by PF or if VF is trusted. + */ + if (ether_addr_equal(addr, vf->default_lan_addr.addr) && + i40e_can_vf_change_mac(vf)) + was_unimac_deleted = true; + else + continue; + if (i40e_del_mac_filter(vsi, al->list[i].addr)) { ret = -EINVAL; spin_unlock_bh(&vsi->mac_filter_hash_lock); goto error_param; } + } spin_unlock_bh(&vsi->mac_filter_hash_lock); -- GitLab From c2b3ec36b422a331e153a9e40d14adcf82685cee Mon Sep 17 00:00:00 2001 From: Maxim Galaganov Date: Fri, 9 Feb 2024 16:25:11 +0300 Subject: [PATCH 0979/1405] selftests: net: ip_local_port_range: define IPPROTO_MPTCP Older glibc's netinet/in.h may leave IPPROTO_MPTCP undefined when building ip_local_port_range.c, that leads to "error: use of undeclared identifier 'IPPROTO_MPTCP'". Define IPPROTO_MPTCP in such cases, just like in other MPTCP selftests. Fixes: 122db5e3634b ("selftests/net: add MPTCP coverage for IP_LOCAL_PORT_RANGE") Reported-by: Linux Kernel Functional Testing Closes: https://lore.kernel.org/netdev/CA+G9fYvGO5q4o_Td_kyQgYieXWKw6ktMa-Q0sBu6S-0y3w2aEQ@mail.gmail.com/ Signed-off-by: Maxim Galaganov Tested-by: Linux Kernel Functional Testing Link: https://lore.kernel.org/r/20240209132512.254520-1-max@internet.ru Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/ip_local_port_range.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c index 0f217a1cc837d..6ebd58869a637 100644 --- a/tools/testing/selftests/net/ip_local_port_range.c +++ b/tools/testing/selftests/net/ip_local_port_range.c @@ -16,6 +16,10 @@ #define IP_LOCAL_PORT_RANGE 51 #endif +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif + static __u32 pack_port_range(__u16 lo, __u16 hi) { return (hi << 16) | (lo << 0); -- GitLab From c73729b64bb692186da080602cd13612783f52ac Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 8 Nov 2023 17:01:03 +0100 Subject: [PATCH 0980/1405] i40e: Fix waiting for queues of all VSIs to be disabled The function i40e_pf_wait_queues_disabled() iterates all PF's VSIs up to 'pf->hw.func_caps.num_vsis' but this is incorrect because the real number of VSIs can be up to 'pf->num_alloc_vsi' that can be higher. Fix this loop. Fixes: 69129dc39fac ("i40e: Modify Tx disable wait flow in case of DCB reconfiguration") Signed-off-by: Ivan Vecera Reviewed-by: Jacob Keller Reviewed-by: Wojciech Drewek Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 6e7fd473abfd0..7a416e923b361 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5360,7 +5360,7 @@ static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf) { int v, ret = 0; - for (v = 0; v < pf->hw.func_caps.num_vsis; v++) { + for (v = 0; v < pf->num_alloc_vsi; v++) { if (pf->vsi[v]) { ret = i40e_vsi_wait_queues_disabled(pf->vsi[v]); if (ret) -- GitLab From 343bb39e1f25a73a38a03d98ca383495c0ed6e92 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 30 Nov 2023 20:31:34 +0100 Subject: [PATCH 0981/1405] i40e: Fix wrong mask used during DCB config Mask used for clearing PRTDCB_RETSTCC register in function i40e_dcb_hw_rx_ets_bw_config() is incorrect as there is used define I40E_PRTDCB_RETSTCC_ETSTC_SHIFT instead of define I40E_PRTDCB_RETSTCC_ETSTC_MASK. The PRTDCB_RETSTCC register is used to configure whether ETS or strict priority is used as TSA in Rx for particular TC. In practice it means that once the register is set to use ETS as TSA then it is not possible to switch back to strict priority without CoreR reset. Fix the value in the clearing mask. Fixes: 90bc8e003be2 ("i40e: Add hardware configuration for software based DCB") Signed-off-by: Ivan Vecera Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_dcb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c index 9d88ed6105fd8..8db1eb0c1768c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c @@ -1523,7 +1523,7 @@ void i40e_dcb_hw_rx_ets_bw_config(struct i40e_hw *hw, u8 *bw_share, reg = rd32(hw, I40E_PRTDCB_RETSTCC(i)); reg &= ~(I40E_PRTDCB_RETSTCC_BWSHARE_MASK | I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK | - I40E_PRTDCB_RETSTCC_ETSTC_SHIFT); + I40E_PRTDCB_RETSTCC_ETSTC_MASK); reg |= FIELD_PREP(I40E_PRTDCB_RETSTCC_BWSHARE_MASK, bw_share[i]); reg |= FIELD_PREP(I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK, -- GitLab From 89a373e9131d4200038a0ef232dad80212209de0 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 6 Feb 2024 13:41:31 +0100 Subject: [PATCH 0982/1405] i40e: avoid double calling i40e_pf_rxq_wait() Currently, when interface is being brought down and i40e_vsi_stop_rings() is called, i40e_pf_rxq_wait() is called two times, which is wrong. To showcase this scenario, simplified call stack looks as follows: i40e_vsi_stop_rings() i40e_control wait rx_q() i40e_control_rx_q() i40e_pf_rxq_wait() i40e_vsi_wait_queues_disabled() i40e_pf_rxq_wait() // redundant call To fix this, let us s/i40e_control_wait_rx_q/i40e_control_rx_q within i40e_vsi_stop_rings(). Fixes: 65662a8dcdd0 ("i40e: Fix logic of disabling queues") Reviewed-by: Simon Horman Signed-off-by: Maciej Fijalkowski Reviewed-by: Ivan Vecera Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 7a416e923b361..06078c4d54e82 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4926,7 +4926,7 @@ int i40e_vsi_start_rings(struct i40e_vsi *vsi) void i40e_vsi_stop_rings(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; - int pf_q, err, q_end; + int pf_q, q_end; /* When port TX is suspended, don't wait */ if (test_bit(__I40E_PORT_SUSPENDED, vsi->back->state)) @@ -4936,16 +4936,10 @@ void i40e_vsi_stop_rings(struct i40e_vsi *vsi) for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) i40e_pre_tx_queue_cfg(&pf->hw, (u32)pf_q, false); - for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) { - err = i40e_control_wait_rx_q(pf, pf_q, false); - if (err) - dev_info(&pf->pdev->dev, - "VSI seid %d Rx ring %d disable timeout\n", - vsi->seid, pf_q); - } + for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) + i40e_control_rx_q(pf, pf_q, false); msleep(I40E_DISABLE_TX_GAP_MSEC); - pf_q = vsi->base_queue; for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) wr32(&pf->hw, I40E_QTX_ENA(pf_q), 0); -- GitLab From 6ed8187bb36c14f5ea91be0bf20117379df2d25a Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 6 Feb 2024 13:41:32 +0100 Subject: [PATCH 0983/1405] i40e: take into account XDP Tx queues when stopping rings Seth reported that on his side XDP traffic can not survive a round of down/up against i40e interface. Dmesg output was telling us that we were not able to disable the very first XDP ring. That was due to the fact that in i40e_vsi_stop_rings() in a pre-work that is done before calling i40e_vsi_wait_queues_disabled(), XDP Tx queues were not taken into the account. To fix this, let us distinguish between Rx and Tx queue boundaries and take into the account XDP queues for Tx side. Reported-by: Seth Forshee Closes: https://lore.kernel.org/netdev/ZbkE7Ep1N1Ou17sA@do-x1extreme/ Fixes: 65662a8dcdd0 ("i40e: Fix logic of disabling queues") Tested-by: Seth Forshee Reviewed-by: Simon Horman Signed-off-by: Maciej Fijalkowski Reviewed-by: Ivan Vecera Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 06078c4d54e82..54eb55464e315 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4926,21 +4926,23 @@ int i40e_vsi_start_rings(struct i40e_vsi *vsi) void i40e_vsi_stop_rings(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; - int pf_q, q_end; + u32 pf_q, tx_q_end, rx_q_end; /* When port TX is suspended, don't wait */ if (test_bit(__I40E_PORT_SUSPENDED, vsi->back->state)) return i40e_vsi_stop_rings_no_wait(vsi); - q_end = vsi->base_queue + vsi->num_queue_pairs; - for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) - i40e_pre_tx_queue_cfg(&pf->hw, (u32)pf_q, false); + tx_q_end = vsi->base_queue + + vsi->alloc_queue_pairs * (i40e_enabled_xdp_vsi(vsi) ? 2 : 1); + for (pf_q = vsi->base_queue; pf_q < tx_q_end; pf_q++) + i40e_pre_tx_queue_cfg(&pf->hw, pf_q, false); - for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) + rx_q_end = vsi->base_queue + vsi->num_queue_pairs; + for (pf_q = vsi->base_queue; pf_q < rx_q_end; pf_q++) i40e_control_rx_q(pf, pf_q, false); msleep(I40E_DISABLE_TX_GAP_MSEC); - for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) + for (pf_q = vsi->base_queue; pf_q < tx_q_end; pf_q++) wr32(&pf->hw, I40E_QTX_ENA(pf_q), 0); i40e_vsi_wait_queues_disabled(vsi); -- GitLab From 1692b9775e745f84b69dc8ad0075b0855a43db4e Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 8 Feb 2024 09:48:27 +0000 Subject: [PATCH 0984/1405] net: stmmac: xgmac: use #define for string constants The cited commit introduces and uses the string constants dpp_tx_err and dpp_rx_err. These are assigned to constant fields of the array dwxgmac3_error_desc. It has been reported that on GCC 6 and 7.5.0 this results in warnings such as: .../dwxgmac2_core.c:836:20: error: initialiser element is not constant { true, "TDPES0", dpp_tx_err }, I have been able to reproduce this using: GCC 7.5.0, 8.4.0, 9.4.0 and 10.5.0. But not GCC 13.2.0. So it seems this effects older compilers but not newer ones. As Jon points out in his report, the minimum compiler supported by the kernel is GCC 5.1, so it does seem that this ought to be fixed. It is not clear to me what combination of 'const', if any, would address this problem. So this patch takes of using #defines for the string constants Compile tested only. Fixes: 46eba193d04f ("net: stmmac: xgmac: fix handling of DPP safety error for DMA channels") Reported-by: Jon Hunter Closes: https://lore.kernel.org/netdev/c25eb595-8d91-40ea-9f52-efa15ebafdbc@nvidia.com/ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402081135.lAxxBXHk-lkp@intel.com/ Signed-off-by: Simon Horman Link: https://lore.kernel.org/r/20240208-xgmac-const-v1-1-e69a1eeabfc8@kernel.org Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwxgmac2_core.c | 69 ++++++++++--------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 323c57f03c93c..1af2f89a0504a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -830,41 +830,42 @@ static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= { { false, "UNKNOWN", "Unknown Error" }, /* 31 */ }; -static const char * const dpp_rx_err = "Read Rx Descriptor Parity checker Error"; -static const char * const dpp_tx_err = "Read Tx Descriptor Parity checker Error"; +#define DPP_RX_ERR "Read Rx Descriptor Parity checker Error" +#define DPP_TX_ERR "Read Tx Descriptor Parity checker Error" + static const struct dwxgmac3_error_desc dwxgmac3_dma_dpp_errors[32] = { - { true, "TDPES0", dpp_tx_err }, - { true, "TDPES1", dpp_tx_err }, - { true, "TDPES2", dpp_tx_err }, - { true, "TDPES3", dpp_tx_err }, - { true, "TDPES4", dpp_tx_err }, - { true, "TDPES5", dpp_tx_err }, - { true, "TDPES6", dpp_tx_err }, - { true, "TDPES7", dpp_tx_err }, - { true, "TDPES8", dpp_tx_err }, - { true, "TDPES9", dpp_tx_err }, - { true, "TDPES10", dpp_tx_err }, - { true, "TDPES11", dpp_tx_err }, - { true, "TDPES12", dpp_tx_err }, - { true, "TDPES13", dpp_tx_err }, - { true, "TDPES14", dpp_tx_err }, - { true, "TDPES15", dpp_tx_err }, - { true, "RDPES0", dpp_rx_err }, - { true, "RDPES1", dpp_rx_err }, - { true, "RDPES2", dpp_rx_err }, - { true, "RDPES3", dpp_rx_err }, - { true, "RDPES4", dpp_rx_err }, - { true, "RDPES5", dpp_rx_err }, - { true, "RDPES6", dpp_rx_err }, - { true, "RDPES7", dpp_rx_err }, - { true, "RDPES8", dpp_rx_err }, - { true, "RDPES9", dpp_rx_err }, - { true, "RDPES10", dpp_rx_err }, - { true, "RDPES11", dpp_rx_err }, - { true, "RDPES12", dpp_rx_err }, - { true, "RDPES13", dpp_rx_err }, - { true, "RDPES14", dpp_rx_err }, - { true, "RDPES15", dpp_rx_err }, + { true, "TDPES0", DPP_TX_ERR }, + { true, "TDPES1", DPP_TX_ERR }, + { true, "TDPES2", DPP_TX_ERR }, + { true, "TDPES3", DPP_TX_ERR }, + { true, "TDPES4", DPP_TX_ERR }, + { true, "TDPES5", DPP_TX_ERR }, + { true, "TDPES6", DPP_TX_ERR }, + { true, "TDPES7", DPP_TX_ERR }, + { true, "TDPES8", DPP_TX_ERR }, + { true, "TDPES9", DPP_TX_ERR }, + { true, "TDPES10", DPP_TX_ERR }, + { true, "TDPES11", DPP_TX_ERR }, + { true, "TDPES12", DPP_TX_ERR }, + { true, "TDPES13", DPP_TX_ERR }, + { true, "TDPES14", DPP_TX_ERR }, + { true, "TDPES15", DPP_TX_ERR }, + { true, "RDPES0", DPP_RX_ERR }, + { true, "RDPES1", DPP_RX_ERR }, + { true, "RDPES2", DPP_RX_ERR }, + { true, "RDPES3", DPP_RX_ERR }, + { true, "RDPES4", DPP_RX_ERR }, + { true, "RDPES5", DPP_RX_ERR }, + { true, "RDPES6", DPP_RX_ERR }, + { true, "RDPES7", DPP_RX_ERR }, + { true, "RDPES8", DPP_RX_ERR }, + { true, "RDPES9", DPP_RX_ERR }, + { true, "RDPES10", DPP_RX_ERR }, + { true, "RDPES11", DPP_RX_ERR }, + { true, "RDPES12", DPP_RX_ERR }, + { true, "RDPES13", DPP_RX_ERR }, + { true, "RDPES14", DPP_RX_ERR }, + { true, "RDPES15", DPP_RX_ERR }, }; static void dwxgmac3_handle_dma_err(struct net_device *ndev, -- GitLab From 2f74258d997c8b93627041d94daa265b5f0d1b4d Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 9 Feb 2024 16:13:07 -0800 Subject: [PATCH 0985/1405] ionic: minimal work with 0 budget We should be doing as little as possible besides freeing Tx space when our napi routines are called with budget of 0, so jump out before doing anything besides Tx cleaning. See commit afbed3f74830 ("net/mlx5e: do as little as possible in napi poll when budget is 0") for more info. Fixes: fe8c30b50835 ("ionic: separate interrupt for Tx and Rx") Reviewed-by: Brett Creeley Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20240210001307.48450-1-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 54cd96b035d68..6f47767598637 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -579,6 +579,9 @@ int ionic_tx_napi(struct napi_struct *napi, int budget) work_done = ionic_cq_service(cq, budget, ionic_tx_service, NULL, NULL); + if (unlikely(!budget)) + return budget; + if (work_done < budget && napi_complete_done(napi, work_done)) { ionic_dim_update(qcq, IONIC_LIF_F_TX_DIM_INTR); flags |= IONIC_INTR_CRED_UNMASK; @@ -607,6 +610,9 @@ int ionic_rx_napi(struct napi_struct *napi, int budget) u32 work_done = 0; u32 flags = 0; + if (unlikely(!budget)) + return budget; + lif = cq->bound_q->lif; idev = &lif->ionic->idev; @@ -656,6 +662,9 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget) tx_work_done = ionic_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT, ionic_tx_service, NULL, NULL); + if (unlikely(!budget)) + return budget; + rx_work_done = ionic_cq_service(rxcq, budget, ionic_rx_service, NULL, NULL); -- GitLab From 3e36031cc0540ca97b615cbb940331892cbd3d21 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 9 Feb 2024 16:20:02 -0800 Subject: [PATCH 0986/1405] pds_core: no health-thread in VF path The VFs don't run the health thread, so don't try to stop or restart the non-existent timer or work item. Fixes: d9407ff11809 ("pds_core: Prevent health thread from running during reset/remove") Reviewed-by: Brett Creeley Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20240210002002.49483-1-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index cdbf053b5376c..0050c5894563b 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -451,6 +451,9 @@ static void pdsc_remove(struct pci_dev *pdev) static void pdsc_stop_health_thread(struct pdsc *pdsc) { + if (pdsc->pdev->is_virtfn) + return; + timer_shutdown_sync(&pdsc->wdtimer); if (pdsc->health_work.func) cancel_work_sync(&pdsc->health_work); @@ -458,6 +461,9 @@ static void pdsc_stop_health_thread(struct pdsc *pdsc) static void pdsc_restart_health_thread(struct pdsc *pdsc) { + if (pdsc->pdev->is_virtfn) + return; + timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0); mod_timer(&pdsc->wdtimer, jiffies + 1); } -- GitLab From 9f30831390ede02d9fcd54fd9ea5a585ab649f4a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 9 Feb 2024 18:12:48 +0000 Subject: [PATCH 0987/1405] net: add rcu safety to rtnl_prop_list_size() rtnl_prop_list_size() can be called while alternative names are added or removed concurrently. if_nlmsg_size() / rtnl_calcit() can indeed be called without RTNL held. Use explicit RCU protection to avoid UAF. Fixes: 88f4fb0c7496 ("net: rtnetlink: put alternative names to getlink message") Signed-off-by: Eric Dumazet Cc: Jiri Pirko Link: https://lore.kernel.org/r/20240209181248.96637-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 2 +- net/core/rtnetlink.c | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 9bb792cecc16f..73a0219730075 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -336,7 +336,7 @@ int netdev_name_node_alt_create(struct net_device *dev, const char *name) return -ENOMEM; netdev_name_node_add(net, name_node); /* The node that holds dev->name acts as a head of per-device list. */ - list_add_tail(&name_node->list, &dev->name_node->list); + list_add_tail_rcu(&name_node->list, &dev->name_node->list); return 0; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f6f29eb03ec27..9c4f427f3a505 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1020,14 +1020,17 @@ static size_t rtnl_xdp_size(void) static size_t rtnl_prop_list_size(const struct net_device *dev) { struct netdev_name_node *name_node; - size_t size; + unsigned int cnt = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(name_node, &dev->name_node->list, list) + cnt++; + rcu_read_unlock(); - if (list_empty(&dev->name_node->list)) + if (!cnt) return 0; - size = nla_total_size(0); - list_for_each_entry(name_node, &dev->name_node->list, list) - size += nla_total_size(ALTIFNAMSIZ); - return size; + + return nla_total_size(0) + cnt * nla_total_size(ALTIFNAMSIZ); } static size_t rtnl_proto_down_size(const struct net_device *dev) -- GitLab From 4cbec7e89a416294c46e71c967b57b9119fe0054 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 9 Feb 2024 15:52:47 -0600 Subject: [PATCH 0988/1405] scsi: target: Fix unmap setup during configuration This issue was found and also debugged by Carl Lei . If the device is not enabled, iblock/file will have not setup their se_device to bdev/file mappings. If a user tries to config the unmap settings at this time, we will then crash trying to access a NULL pointer where the bdev/file should be. This patch adds a check to make sure the device is configured before we try to call the configure_unmap callout. Fixes: 34bd1dcacf0d ("scsi: target: Detect UNMAP support post configuration") Reported-by: Carl Lei Signed-off-by: Mike Christie Link: https://lore.kernel.org/r/20240209215247.5213-1-michael.christie@oracle.com Reviewed-by: Maurizio Lombardi Signed-off-by: Martin K. Petersen --- drivers/target/target_core_configfs.c | 48 ++++++++++++++++++--------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index a5f58988130a1..c1fbcdd161826 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -759,6 +759,29 @@ static ssize_t emulate_tas_store(struct config_item *item, return count; } +static int target_try_configure_unmap(struct se_device *dev, + const char *config_opt) +{ + if (!dev->transport->configure_unmap) { + pr_err("Generic Block Discard not supported\n"); + return -ENOSYS; + } + + if (!target_dev_configured(dev)) { + pr_err("Generic Block Discard setup for %s requires device to be configured\n", + config_opt); + return -ENODEV; + } + + if (!dev->transport->configure_unmap(dev)) { + pr_err("Generic Block Discard setup for %s failed\n", + config_opt); + return -ENOSYS; + } + + return 0; +} + static ssize_t emulate_tpu_store(struct config_item *item, const char *page, size_t count) { @@ -776,11 +799,9 @@ static ssize_t emulate_tpu_store(struct config_item *item, * Discard supported is detected iblock_create_virtdevice(). */ if (flag && !da->max_unmap_block_desc_count) { - if (!dev->transport->configure_unmap || - !dev->transport->configure_unmap(dev)) { - pr_err("Generic Block Discard not supported\n"); - return -ENOSYS; - } + ret = target_try_configure_unmap(dev, "emulate_tpu"); + if (ret) + return ret; } da->emulate_tpu = flag; @@ -806,11 +827,9 @@ static ssize_t emulate_tpws_store(struct config_item *item, * Discard supported is detected iblock_create_virtdevice(). */ if (flag && !da->max_unmap_block_desc_count) { - if (!dev->transport->configure_unmap || - !dev->transport->configure_unmap(dev)) { - pr_err("Generic Block Discard not supported\n"); - return -ENOSYS; - } + ret = target_try_configure_unmap(dev, "emulate_tpws"); + if (ret) + return ret; } da->emulate_tpws = flag; @@ -1022,12 +1041,9 @@ static ssize_t unmap_zeroes_data_store(struct config_item *item, * Discard supported is detected iblock_configure_device(). */ if (flag && !da->max_unmap_block_desc_count) { - if (!dev->transport->configure_unmap || - !dev->transport->configure_unmap(dev)) { - pr_err("dev[%p]: Thin Provisioning LBPRZ will not be set because max_unmap_block_desc_count is zero\n", - da->da_dev); - return -ENOSYS; - } + ret = target_try_configure_unmap(dev, "unmap_zeroes_data"); + if (ret) + return ret; } da->unmap_zeroes_data = flag; pr_debug("dev[%p]: SE Device Thin Provisioning LBPRZ bit: %d\n", -- GitLab From 977fe773dcc7098d8eaf4ee6382cb51e13e784cb Mon Sep 17 00:00:00 2001 From: Lee Duncan Date: Fri, 9 Feb 2024 10:07:34 -0800 Subject: [PATCH 0989/1405] scsi: Revert "scsi: fcoe: Fix potential deadlock on &fip->ctlr_lock" This reverts commit 1a1975551943f681772720f639ff42fbaa746212. This commit causes interrupts to be lost for FCoE devices, since it changed sping locks from "bh" to "irqsave". Instead, a work queue should be used, and will be addressed in a separate commit. Fixes: 1a1975551943 ("scsi: fcoe: Fix potential deadlock on &fip->ctlr_lock") Signed-off-by: Lee Duncan Link: https://lore.kernel.org/r/c578cdcd46b60470535c4c4a953e6a1feca0dffd.1707500786.git.lduncan@suse.com Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/fcoe/fcoe_ctlr.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 19eee108db021..5c8d1ba3f8f3c 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -319,17 +319,16 @@ static void fcoe_ctlr_announce(struct fcoe_ctlr *fip) { struct fcoe_fcf *sel; struct fcoe_fcf *fcf; - unsigned long flags; mutex_lock(&fip->ctlr_mutex); - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); kfree_skb(fip->flogi_req); fip->flogi_req = NULL; list_for_each_entry(fcf, &fip->fcfs, list) fcf->flogi_sent = 0; - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); sel = fip->sel_fcf; if (sel && ether_addr_equal(sel->fcf_mac, fip->dest_addr)) @@ -700,7 +699,6 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport, { struct fc_frame *fp; struct fc_frame_header *fh; - unsigned long flags; u16 old_xid; u8 op; u8 mac[ETH_ALEN]; @@ -734,11 +732,11 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport, op = FIP_DT_FLOGI; if (fip->mode == FIP_MODE_VN2VN) break; - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); kfree_skb(fip->flogi_req); fip->flogi_req = skb; fip->flogi_req_send = 1; - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); schedule_work(&fip->timer_work); return -EINPROGRESS; case ELS_FDISC: @@ -1707,11 +1705,10 @@ static int fcoe_ctlr_flogi_send_locked(struct fcoe_ctlr *fip) static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip) { struct fcoe_fcf *fcf; - unsigned long flags; int error; mutex_lock(&fip->ctlr_mutex); - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); LIBFCOE_FIP_DBG(fip, "re-sending FLOGI - reselect\n"); fcf = fcoe_ctlr_select(fip); if (!fcf || fcf->flogi_sent) { @@ -1722,7 +1719,7 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip) fcoe_ctlr_solicit(fip, NULL); error = fcoe_ctlr_flogi_send_locked(fip); } - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); mutex_unlock(&fip->ctlr_mutex); return error; } @@ -1739,9 +1736,8 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip) static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip) { struct fcoe_fcf *fcf; - unsigned long flags; - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); fcf = fip->sel_fcf; if (!fcf || !fip->flogi_req_send) goto unlock; @@ -1768,7 +1764,7 @@ static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip) } else /* XXX */ LIBFCOE_FIP_DBG(fip, "No FCF selected - defer send\n"); unlock: - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); } /** -- GitLab From 379a58caa19930e010b7efa1c1f3b9411d3d2ca3 Mon Sep 17 00:00:00 2001 From: Lee Duncan Date: Fri, 9 Feb 2024 10:07:35 -0800 Subject: [PATCH 0990/1405] scsi: fnic: Move fnic_fnic_flush_tx() to a work queue Rather than call 'fnic_flush_tx()' from interrupt context we should be moving it onto a work queue to avoid any locking issues. Fixes: 1a1975551943 ("scsi: fcoe: Fix potential deadlock on &fip->ctlr_lock") Co-developed-by: Hannes Reinecke Signed-off-by: Hannes Reinecke Signed-off-by: Lee Duncan Link: https://lore.kernel.org/r/ce5ffa5d0ff82c2b2e283b3b4bff23291d49b05c.1707500786.git.lduncan@suse.com Signed-off-by: Martin K. Petersen --- drivers/scsi/fnic/fnic.h | 3 ++- drivers/scsi/fnic/fnic_fcs.c | 5 +++-- drivers/scsi/fnic/fnic_main.c | 1 + drivers/scsi/fnic/fnic_scsi.c | 4 ++-- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h index 2074937c05bc8..ce73f08ee889f 100644 --- a/drivers/scsi/fnic/fnic.h +++ b/drivers/scsi/fnic/fnic.h @@ -305,6 +305,7 @@ struct fnic { unsigned int copy_wq_base; struct work_struct link_work; struct work_struct frame_work; + struct work_struct flush_work; struct sk_buff_head frame_queue; struct sk_buff_head tx_queue; @@ -363,7 +364,7 @@ void fnic_handle_event(struct work_struct *work); int fnic_rq_cmpl_handler(struct fnic *fnic, int); int fnic_alloc_rq_frame(struct vnic_rq *rq); void fnic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf); -void fnic_flush_tx(struct fnic *); +void fnic_flush_tx(struct work_struct *work); void fnic_eth_send(struct fcoe_ctlr *, struct sk_buff *skb); void fnic_set_port_id(struct fc_lport *, u32, struct fc_frame *); void fnic_update_mac(struct fc_lport *, u8 *new); diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c index 5e312a55cc7da..a08293b2ad9f5 100644 --- a/drivers/scsi/fnic/fnic_fcs.c +++ b/drivers/scsi/fnic/fnic_fcs.c @@ -1182,7 +1182,7 @@ int fnic_send(struct fc_lport *lp, struct fc_frame *fp) /** * fnic_flush_tx() - send queued frames. - * @fnic: fnic device + * @work: pointer to work element * * Send frames that were waiting to go out in FC or Ethernet mode. * Whenever changing modes we purge queued frames, so these frames should @@ -1190,8 +1190,9 @@ int fnic_send(struct fc_lport *lp, struct fc_frame *fp) * * Called without fnic_lock held. */ -void fnic_flush_tx(struct fnic *fnic) +void fnic_flush_tx(struct work_struct *work) { + struct fnic *fnic = container_of(work, struct fnic, flush_work); struct sk_buff *skb; struct fc_frame *fp; diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c index 5ed1d897311a8..29eead383eb9a 100644 --- a/drivers/scsi/fnic/fnic_main.c +++ b/drivers/scsi/fnic/fnic_main.c @@ -830,6 +830,7 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) spin_lock_init(&fnic->vlans_lock); INIT_WORK(&fnic->fip_frame_work, fnic_handle_fip_frame); INIT_WORK(&fnic->event_work, fnic_handle_event); + INIT_WORK(&fnic->flush_work, fnic_flush_tx); skb_queue_head_init(&fnic->fip_frame_queue); INIT_LIST_HEAD(&fnic->evlist); INIT_LIST_HEAD(&fnic->vlans); diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 8d7fc5284293b..fc4cee91b175c 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -680,7 +680,7 @@ static int fnic_fcpio_fw_reset_cmpl_handler(struct fnic *fnic, spin_unlock_irqrestore(&fnic->fnic_lock, flags); - fnic_flush_tx(fnic); + queue_work(fnic_event_queue, &fnic->flush_work); reset_cmpl_handler_end: fnic_clear_state_flags(fnic, FNIC_FLAGS_FWRESET); @@ -736,7 +736,7 @@ static int fnic_fcpio_flogi_reg_cmpl_handler(struct fnic *fnic, } spin_unlock_irqrestore(&fnic->fnic_lock, flags); - fnic_flush_tx(fnic); + queue_work(fnic_event_queue, &fnic->flush_work); queue_work(fnic_event_queue, &fnic->frame_work); } else { spin_unlock_irqrestore(&fnic->fnic_lock, flags); -- GitLab From b2c52b8c128ea07f1632c516cec0d72cb63b5599 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sun, 28 Jan 2024 17:50:43 +0100 Subject: [PATCH 0991/1405] xen/privcmd: Use memdup_array_user() in alloc_ioreq() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * The function “memdup_array_user” was added with the commit 313ebe47d75558511aa1237b6e35c663b5c0ec6f ("string.h: add array-wrappers for (v)memdup_user()"). Thus use it accordingly. This issue was detected by using the Coccinelle software. * Delete a label which became unnecessary with this refactoring. Signed-off-by: Markus Elfring Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/41e333f7-1f3a-41b6-a121-a3c0ae54e36f@web.de Signed-off-by: Juergen Gross --- drivers/xen/privcmd.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 35b6e306026a4..67dfa47788649 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -1223,18 +1223,13 @@ struct privcmd_kernel_ioreq *alloc_ioreq(struct privcmd_ioeventfd *ioeventfd) kioreq->ioreq = (struct ioreq *)(page_to_virt(pages[0])); mmap_write_unlock(mm); - size = sizeof(*ports) * kioreq->vcpus; - ports = kzalloc(size, GFP_KERNEL); - if (!ports) { - ret = -ENOMEM; + ports = memdup_array_user(u64_to_user_ptr(ioeventfd->ports), + kioreq->vcpus, sizeof(*ports)); + if (IS_ERR(ports)) { + ret = PTR_ERR(ports); goto error_kfree; } - if (copy_from_user(ports, u64_to_user_ptr(ioeventfd->ports), size)) { - ret = -EFAULT; - goto error_kfree_ports; - } - for (i = 0; i < kioreq->vcpus; i++) { kioreq->ports[i].vcpu = i; kioreq->ports[i].port = ports[i]; @@ -1256,7 +1251,7 @@ struct privcmd_kernel_ioreq *alloc_ioreq(struct privcmd_ioeventfd *ioeventfd) error_unbind: while (--i >= 0) unbind_from_irqhandler(irq_from_evtchn(ports[i]), &kioreq->ports[i]); -error_kfree_ports: + kfree(ports); error_kfree: kfree(kioreq); -- GitLab From 2528dcbea94496f080aad9311327baedc11831d7 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Sat, 3 Feb 2024 15:53:37 -0300 Subject: [PATCH 0992/1405] xen: pcpu: make xen_pcpu_subsys const Now that the driver core can properly handle constant struct bus_type, move the xen_pcpu_subsys variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: Greg Kroah-Hartman Suggested-by: Greg Kroah-Hartman Signed-off-by: Ricardo B. Marliere Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20240203-bus_cleanup-xen-v1-1-c2f5fe89ed95@marliere.net Signed-off-by: Juergen Gross --- drivers/xen/pcpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c index 5086552731453..c63f317e3df3d 100644 --- a/drivers/xen/pcpu.c +++ b/drivers/xen/pcpu.c @@ -65,7 +65,7 @@ struct pcpu { uint32_t flags; }; -static struct bus_type xen_pcpu_subsys = { +static const struct bus_type xen_pcpu_subsys = { .name = "xen_cpu", .dev_name = "xen_cpu", }; -- GitLab From b0f2f82c9c16427722b8782371c2efe82b1f815c Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Sat, 3 Feb 2024 15:53:38 -0300 Subject: [PATCH 0993/1405] xen: balloon: make balloon_subsys const Now that the driver core can properly handle constant struct bus_type, move the balloon_subsys variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: Greg Kroah-Hartman Suggested-by: Greg Kroah-Hartman Signed-off-by: Ricardo B. Marliere Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20240203-bus_cleanup-xen-v1-2-c2f5fe89ed95@marliere.net Signed-off-by: Juergen Gross --- drivers/xen/xen-balloon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index 8cd583db20b17..b293d7652f155 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -237,7 +237,7 @@ static const struct attribute_group *balloon_groups[] = { NULL }; -static struct bus_type balloon_subsys = { +static const struct bus_type balloon_subsys = { .name = BALLOON_CLASS_NAME, .dev_name = BALLOON_CLASS_NAME, }; -- GitLab From bf5802238dc181b1f7375d358af1d01cd72d1c11 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 6 Feb 2024 09:03:24 -0800 Subject: [PATCH 0994/1405] xen/gntalloc: Replace UAPI 1-element array Without changing the structure size (since it is UAPI), add a proper flexible array member, and reference it in the kernel so that it will not be trip the array-bounds sanitizer[1]. Link: https://github.com/KSPP/linux/issues/113 [1] Cc: Juergen Gross Cc: Stefano Stabellini Cc: Oleksandr Tyshchenko Cc: Gustavo A. R. Silva Cc: xen-devel@lists.xenproject.org Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20240206170320.work.437-kees@kernel.org Signed-off-by: Juergen Gross --- drivers/xen/gntalloc.c | 2 +- include/uapi/xen/gntalloc.h | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index 26ffb8755ffb5..f93f73ecefeee 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -317,7 +317,7 @@ static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv, rc = -EFAULT; goto out_free; } - if (copy_to_user(arg->gref_ids, gref_ids, + if (copy_to_user(arg->gref_ids_flex, gref_ids, sizeof(gref_ids[0]) * op.count)) { rc = -EFAULT; goto out_free; diff --git a/include/uapi/xen/gntalloc.h b/include/uapi/xen/gntalloc.h index 48d2790ef928c..3109282672f33 100644 --- a/include/uapi/xen/gntalloc.h +++ b/include/uapi/xen/gntalloc.h @@ -31,7 +31,10 @@ struct ioctl_gntalloc_alloc_gref { __u64 index; /* The grant references of the newly created grant, one per page */ /* Variable size, depending on count */ - __u32 gref_ids[1]; + union { + __u32 gref_ids[1]; + __DECLARE_FLEX_ARRAY(__u32, gref_ids_flex); + }; }; #define GNTALLOC_FLAG_WRITABLE 1 -- GitLab From b0344d6854d25a8b3b901c778b1728885dd99007 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Fri, 9 Feb 2024 17:24:49 -0800 Subject: [PATCH 0995/1405] irqchip/irq-brcmstb-l2: Add write memory barrier before exit It was observed on Broadcom devices that use GIC v3 architecture L1 interrupt controllers as the parent of brcmstb-l2 interrupt controllers that the deactivation of the parent interrupt could happen before the brcmstb-l2 deasserted its output. This would lead the GIC to reactivate the interrupt only to find that no L2 interrupt was pending. The result was a spurious interrupt invoking handle_bad_irq() with its associated messaging. While this did not create a functional problem it is a waste of cycles. The hazard exists because the memory mapped bus writes to the brcmstb-l2 registers are buffered and the GIC v3 architecture uses a very efficient system register write to deactivate the interrupt. Add a write memory barrier prior to invoking chained_irq_exit() to introduce a dsb(st) on those systems to ensure the system register write cannot be executed until the memory mapped writes are visible to the system. [ florian: Added Fixes tag ] Fixes: 7f646e92766e ("irqchip: brcmstb-l2: Add Broadcom Set Top Box Level-2 interrupt controller") Signed-off-by: Doug Berger Signed-off-by: Florian Fainelli Signed-off-by: Thomas Gleixner Acked-by: Florian Fainelli Acked-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240210012449.3009125-1-florian.fainelli@broadcom.com --- drivers/irqchip/irq-brcmstb-l2.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c index 5559c943f03f9..2b0b3175cea06 100644 --- a/drivers/irqchip/irq-brcmstb-l2.c +++ b/drivers/irqchip/irq-brcmstb-l2.c @@ -2,7 +2,7 @@ /* * Generic Broadcom Set Top Box Level 2 Interrupt controller driver * - * Copyright (C) 2014-2017 Broadcom + * Copyright (C) 2014-2024 Broadcom */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -112,6 +112,9 @@ static void brcmstb_l2_intc_irq_handle(struct irq_desc *desc) generic_handle_domain_irq(b->domain, irq); } while (status); out: + /* Don't ack parent before all device writes are done */ + wmb(); + chained_irq_exit(chip, desc); } -- GitLab From fa765c4b4aed2d64266b694520ecb025c862c5a9 Mon Sep 17 00:00:00 2001 From: Maximilian Heyne Date: Wed, 24 Jan 2024 16:31:28 +0000 Subject: [PATCH 0996/1405] xen/events: close evtchn after mapping cleanup shutdown_pirq and startup_pirq are not taking the irq_mapping_update_lock because they can't due to lock inversion. Both are called with the irq_desc->lock being taking. The lock order, however, is first irq_mapping_update_lock and then irq_desc->lock. This opens multiple races: - shutdown_pirq can be interrupted by a function that allocates an event channel: CPU0 CPU1 shutdown_pirq { xen_evtchn_close(e) __startup_pirq { EVTCHNOP_bind_pirq -> returns just freed evtchn e set_evtchn_to_irq(e, irq) } xen_irq_info_cleanup() { set_evtchn_to_irq(e, -1) } } Assume here event channel e refers here to the same event channel number. After this race the evtchn_to_irq mapping for e is invalid (-1). - __startup_pirq races with __unbind_from_irq in a similar way. Because __startup_pirq doesn't take irq_mapping_update_lock it can grab the evtchn that __unbind_from_irq is currently freeing and cleaning up. In this case even though the event channel is allocated, its mapping can be unset in evtchn_to_irq. The fix is to first cleanup the mappings and then close the event channel. In this way, when an event channel gets allocated it's potential previous evtchn_to_irq mappings are guaranteed to be unset already. This is also the reverse order of the allocation where first the event channel is allocated and then the mappings are setup. On a 5.10 kernel prior to commit 3fcdaf3d7634 ("xen/events: modify internal [un]bind interfaces"), we hit a BUG like the following during probing of NVMe devices. The issue is that during nvme_setup_io_queues, pci_free_irq is called for every device which results in a call to shutdown_pirq. With many nvme devices it's therefore likely to hit this race during boot because there will be multiple calls to shutdown_pirq and startup_pirq are running potentially in parallel. ------------[ cut here ]------------ blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled kernel BUG at drivers/xen/events/events_base.c:499! invalid opcode: 0000 [#1] SMP PTI CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1 Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006 Workqueue: nvme-reset-wq nvme_reset_work RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0 Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00 RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006 RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00 R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002 FS: 0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? show_trace_log_lvl+0x1c1/0x2d9 ? show_trace_log_lvl+0x1c1/0x2d9 ? set_affinity_irq+0xdc/0x1c0 ? __die_body.cold+0x8/0xd ? die+0x2b/0x50 ? do_trap+0x90/0x110 ? bind_evtchn_to_cpu+0xdf/0xf0 ? do_error_trap+0x65/0x80 ? bind_evtchn_to_cpu+0xdf/0xf0 ? exc_invalid_op+0x4e/0x70 ? bind_evtchn_to_cpu+0xdf/0xf0 ? asm_exc_invalid_op+0x12/0x20 ? bind_evtchn_to_cpu+0xdf/0xf0 ? bind_evtchn_to_cpu+0xc5/0xf0 set_affinity_irq+0xdc/0x1c0 irq_do_set_affinity+0x1d7/0x1f0 irq_setup_affinity+0xd6/0x1a0 irq_startup+0x8a/0xf0 __setup_irq+0x639/0x6d0 ? nvme_suspend+0x150/0x150 request_threaded_irq+0x10c/0x180 ? nvme_suspend+0x150/0x150 pci_request_irq+0xa8/0xf0 ? __blk_mq_free_request+0x74/0xa0 queue_request_irq+0x6f/0x80 nvme_create_queue+0x1af/0x200 nvme_create_io_queues+0xbd/0xf0 nvme_setup_io_queues+0x246/0x320 ? nvme_irq_check+0x30/0x30 nvme_reset_work+0x1c8/0x400 process_one_work+0x1b0/0x350 worker_thread+0x49/0x310 ? process_one_work+0x350/0x350 kthread+0x11b/0x140 ? __kthread_bind_mask+0x60/0x60 ret_from_fork+0x22/0x30 Modules linked in: ---[ end trace a11715de1eee1873 ]--- Fixes: d46a78b05c0e ("xen: implement pirq type event channels") Cc: stable@vger.kernel.org Co-debugged-by: Andrew Panyakin Signed-off-by: Maximilian Heyne Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20240124163130.31324-1-mheyne@amazon.de Signed-off-by: Juergen Gross --- drivers/xen/events/events_base.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index b8cfea7812d6b..3b9f080109d7e 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -923,8 +923,8 @@ static void shutdown_pirq(struct irq_data *data) return; do_mask(info, EVT_MASK_REASON_EXPLICIT); - xen_evtchn_close(evtchn); xen_irq_info_cleanup(info); + xen_evtchn_close(evtchn); } static void enable_pirq(struct irq_data *data) @@ -956,6 +956,7 @@ EXPORT_SYMBOL_GPL(xen_irq_from_gsi); static void __unbind_from_irq(struct irq_info *info, unsigned int irq) { evtchn_port_t evtchn; + bool close_evtchn = false; if (!info) { xen_irq_free_desc(irq); @@ -975,7 +976,7 @@ static void __unbind_from_irq(struct irq_info *info, unsigned int irq) struct xenbus_device *dev; if (!info->is_static) - xen_evtchn_close(evtchn); + close_evtchn = true; switch (info->type) { case IRQT_VIRQ: @@ -995,6 +996,9 @@ static void __unbind_from_irq(struct irq_info *info, unsigned int irq) } xen_irq_info_cleanup(info); + + if (close_evtchn) + xen_evtchn_close(evtchn); } xen_free_irq(info); -- GitLab From f1acf1ac84d2ae97b7889b87223c1064df850069 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Thu, 8 Feb 2024 19:28:54 -0700 Subject: [PATCH 0997/1405] net:rds: Fix possible deadlock in rds_message_put Functions rds_still_queued and rds_clear_recv_queue lock a given socket in order to safely iterate over the incoming rds messages. However calling rds_inc_put while under this lock creates a potential deadlock. rds_inc_put may eventually call rds_message_purge, which will lock m_rs_lock. This is the incorrect locking order since m_rs_lock is meant to be locked before the socket. To fix this, we move the message item to a local list or variable that wont need rs_recv_lock protection. Then we can safely call rds_inc_put on any item stored locally after rs_recv_lock is released. Fixes: bdbe6fbc6a2f ("RDS: recv.c") Reported-by: syzbot+f9db6ff27b9bfdcfeca0@syzkaller.appspotmail.com Reported-by: syzbot+dcd73ff9291e6d34b3ab@syzkaller.appspotmail.com Signed-off-by: Allison Henderson Link: https://lore.kernel.org/r/20240209022854.200292-1-allison.henderson@oracle.com Signed-off-by: Paolo Abeni --- net/rds/recv.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/rds/recv.c b/net/rds/recv.c index c71b923764fd7..5627f80013f8b 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -425,6 +425,7 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc, struct sock *sk = rds_rs_to_sk(rs); int ret = 0; unsigned long flags; + struct rds_incoming *to_drop = NULL; write_lock_irqsave(&rs->rs_recv_lock, flags); if (!list_empty(&inc->i_item)) { @@ -435,11 +436,14 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc, -be32_to_cpu(inc->i_hdr.h_len), inc->i_hdr.h_dport); list_del_init(&inc->i_item); - rds_inc_put(inc); + to_drop = inc; } } write_unlock_irqrestore(&rs->rs_recv_lock, flags); + if (to_drop) + rds_inc_put(to_drop); + rdsdebug("inc %p rs %p still %d dropped %d\n", inc, rs, ret, drop); return ret; } @@ -758,16 +762,21 @@ void rds_clear_recv_queue(struct rds_sock *rs) struct sock *sk = rds_rs_to_sk(rs); struct rds_incoming *inc, *tmp; unsigned long flags; + LIST_HEAD(to_drop); write_lock_irqsave(&rs->rs_recv_lock, flags); list_for_each_entry_safe(inc, tmp, &rs->rs_recv_queue, i_item) { rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong, -be32_to_cpu(inc->i_hdr.h_len), inc->i_hdr.h_dport); + list_move(&inc->i_item, &to_drop); + } + write_unlock_irqrestore(&rs->rs_recv_lock, flags); + + list_for_each_entry_safe(inc, tmp, &to_drop, i_item) { list_del_init(&inc->i_item); rds_inc_put(inc); } - write_unlock_irqrestore(&rs->rs_recv_lock, flags); } /* -- GitLab From f1c2765c6afcd1f71f76ed8c9bf94acedab4cecb Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 30 Jan 2024 16:27:20 +0800 Subject: [PATCH 0998/1405] irqchip/loongson-eiointc: Use correct struct type in eiointc_domain_alloc() eiointc_domain_alloc() uses struct eiointc, which is not defined, for a pointer. Older compilers treat that as a forward declaration and due to assignment of a void pointer there is no warning emitted. As the variable is then handed in as a void pointer argument to irq_domain_set_info() the code is functional. Use struct eiointc_priv instead. [ tglx: Rewrote changelog ] Fixes: dd281e1a1a93 ("irqchip: Add Loongson Extended I/O interrupt controller support") Signed-off-by: Bibo Mao Signed-off-by: Thomas Gleixner Acked-by: Huacai Chen Link: https://lore.kernel.org/r/20240130082722.2912576-2-maobibo@loongson.cn --- drivers/irqchip/irq-loongson-eiointc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index 1623cd7791752..b3736bdd4b9f2 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -241,7 +241,7 @@ static int eiointc_domain_alloc(struct irq_domain *domain, unsigned int virq, int ret; unsigned int i, type; unsigned long hwirq = 0; - struct eiointc *priv = domain->host_data; + struct eiointc_priv *priv = domain->host_data; ret = irq_domain_translate_onecell(domain, arg, &hwirq, &type); if (ret) -- GitLab From 8ad032cc8c499af6f3289c796f411e8874b50fdb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 1 Feb 2024 15:17:50 +0300 Subject: [PATCH 0999/1405] irqchip/qcom-mpm: Fix IS_ERR() vs NULL check in qcom_mpm_init() devm_ioremap() doesn't return error pointers, it returns NULL on error. Update the check accordingly. Fixes: 221b110d87c2 ("irqchip/qcom-mpm: Support passing a slice of SRAM as reg space") Signed-off-by: Dan Carpenter Signed-off-by: Thomas Gleixner Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/22e1f4de-edce-4791-bd2d-2b2e98529492@moroto.mountain --- drivers/irqchip/irq-qcom-mpm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c index cda5838d2232d..7942d8eb3d00e 100644 --- a/drivers/irqchip/irq-qcom-mpm.c +++ b/drivers/irqchip/irq-qcom-mpm.c @@ -389,8 +389,8 @@ static int qcom_mpm_init(struct device_node *np, struct device_node *parent) /* Don't use devm_ioremap_resource, as we're accessing a shared region. */ priv->base = devm_ioremap(dev, res.start, resource_size(&res)); of_node_put(msgram_np); - if (IS_ERR(priv->base)) - return PTR_ERR(priv->base); + if (!priv->base) + return -ENOMEM; } else { /* Otherwise, fall back to simple MMIO. */ priv->base = devm_platform_ioremap_resource(pdev, 0); -- GitLab From 6ac86372102b477083db99a9af8246fb916271b5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 25 Jan 2024 09:15:59 +0100 Subject: [PATCH 1000/1405] gpiolib: add gpiod_to_gpio_device() stub for !GPIOLIB Add empty stub of gpiod_to_gpio_device() when GPIOLIB is not enabled. Cc: Fixes: 370232d096e3 ("gpiolib: provide gpiod_to_gpio_device()") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 9a5c6c76e6533..012797e7106d8 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -819,6 +819,12 @@ static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) return ERR_PTR(-ENODEV); } +static inline struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc) +{ + WARN_ON(1); + return ERR_PTR(-ENODEV); +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { -- GitLab From ebe0c15b135b1e4092c25b95d89e9a5899467499 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 25 Jan 2024 09:16:00 +0100 Subject: [PATCH 1001/1405] gpiolib: add gpio_device_get_base() stub for !GPIOLIB Add empty stub of gpio_device_get_base() when GPIOLIB is not enabled. Cc: Fixes: 8c85a102fc4e ("gpiolib: provide gpio_device_get_base()") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 012797e7106d8..c1df7698edb03 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -825,6 +825,12 @@ static inline struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc) return ERR_PTR(-ENODEV); } +static inline int gpio_device_get_base(struct gpio_device *gdev) +{ + WARN_ON(1); + return -ENODEV; +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { -- GitLab From 2df8aa3cad407044f2febdbbdf220c6dae839c79 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 25 Jan 2024 09:16:01 +0100 Subject: [PATCH 1002/1405] gpiolib: add gpio_device_get_label() stub for !GPIOLIB Add empty stub of gpio_device_get_label() when GPIOLIB is not enabled. Cc: Fixes: d1f7728259ef ("gpiolib: provide gpio_device_get_label()") Suggested-by: kernel test robot Signed-off-by: Krzysztof Kozlowski Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index c1df7698edb03..7f75c9a518741 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -831,6 +831,12 @@ static inline int gpio_device_get_base(struct gpio_device *gdev) return -ENODEV; } +static inline const char *gpio_device_get_label(struct gpio_device *gdev) +{ + WARN_ON(1); + return NULL; +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { -- GitLab From 8929f95b2b587791a7dcd04cc91520194a76d3a6 Mon Sep 17 00:00:00 2001 From: Keqi Wang Date: Fri, 9 Feb 2024 17:16:59 +0800 Subject: [PATCH 1003/1405] connector/cn_proc: revert "connector: Fix proc_event_num_listeners count not cleared" This reverts commit c46bfba1337d ("connector: Fix proc_event_num_listeners count not cleared"). It is not accurate to reset proc_event_num_listeners according to cn_netlink_send_mult() return value -ESRCH. In the case of stress-ng netlink-proc, -ESRCH will always be returned, because netlink_broadcast_filtered will return -ESRCH, which may cause stress-ng netlink-proc performance degradation. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202401112259.b23a1567-oliver.sang@intel.com Fixes: c46bfba1337d ("connector: Fix proc_event_num_listeners count not cleared") Signed-off-by: Keqi Wang Link: https://lore.kernel.org/r/20240209091659.68723-1-wangkeqi_chris@163.com Signed-off-by: Paolo Abeni --- drivers/connector/cn_proc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 3d5e6d705fc6e..44b19e6961763 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -108,9 +108,8 @@ static inline void send_msg(struct cn_msg *msg) filter_data[1] = 0; } - if (cn_netlink_send_mult(msg, msg->len, 0, CN_IDX_PROC, GFP_NOWAIT, - cn_filter, (void *)filter_data) == -ESRCH) - atomic_set(&proc_event_num_listeners, 0); + cn_netlink_send_mult(msg, msg->len, 0, CN_IDX_PROC, GFP_NOWAIT, + cn_filter, (void *)filter_data); local_unlock(&local_event.lock); } -- GitLab From 902d6d013f75b68f31d208c6f3ff9cdca82648a7 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 13 Sep 2023 16:18:34 +0800 Subject: [PATCH 1004/1405] ceph: always queue a writeback when revoking the Fb caps In case there is 'Fw' dirty caps and 'CHECK_CAPS_FLUSH' is set we will always ignore queue a writeback. Queue a writeback is very important because it will block kclient flushing the snapcaps to MDS and which will block MDS waiting for revoking the 'Fb' caps. Link: https://tracker.ceph.com/issues/50223 Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ad1f46c66fbff..bce3a840f15c2 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2156,6 +2156,30 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) ceph_cap_string(cap->implemented), ceph_cap_string(revoking)); + /* completed revocation? going down and there are no caps? */ + if (revoking) { + if ((revoking & cap_used) == 0) { + doutc(cl, "completed revocation of %s\n", + ceph_cap_string(cap->implemented & ~cap->issued)); + goto ack; + } + + /* + * If the "i_wrbuffer_ref" was increased by mmap or generic + * cache write just before the ceph_check_caps() is called, + * the Fb capability revoking will fail this time. Then we + * must wait for the BDI's delayed work to flush the dirty + * pages and to release the "i_wrbuffer_ref", which will cost + * at most 5 seconds. That means the MDS needs to wait at + * most 5 seconds to finished the Fb capability's revocation. + * + * Let's queue a writeback for it. + */ + if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref && + (revoking & CEPH_CAP_FILE_BUFFER)) + queue_writeback = true; + } + if (cap == ci->i_auth_cap && (cap->issued & CEPH_CAP_FILE_WR)) { /* request larger max_size from MDS? */ @@ -2183,30 +2207,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags) } } - /* completed revocation? going down and there are no caps? */ - if (revoking) { - if ((revoking & cap_used) == 0) { - doutc(cl, "completed revocation of %s\n", - ceph_cap_string(cap->implemented & ~cap->issued)); - goto ack; - } - - /* - * If the "i_wrbuffer_ref" was increased by mmap or generic - * cache write just before the ceph_check_caps() is called, - * the Fb capability revoking will fail this time. Then we - * must wait for the BDI's delayed work to flush the dirty - * pages and to release the "i_wrbuffer_ref", which will cost - * at most 5 seconds. That means the MDS needs to wait at - * most 5 seconds to finished the Fb capability's revocation. - * - * Let's queue a writeback for it. - */ - if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref && - (revoking & CEPH_CAP_FILE_BUFFER)) - queue_writeback = true; - } - /* want more caps from mds? */ if (want & ~cap->mds_wanted) { if (want & ~(cap->mds_wanted | cap->issued)) -- GitLab From dbc347ef7f0c53aa4a5383238a804d7ebbb0b5ca Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 14 Sep 2023 10:29:16 +0800 Subject: [PATCH 1005/1405] ceph: add ceph_cap_unlink_work to fire check_caps() immediately When unlinking a file the check caps could be delayed for more than 5 seconds, but in MDS side it maybe waiting for the clients to release caps. This will use the cap_wq work queue and a dedicated list to help fire the check_caps() and dirty buffer flushing immediately. Link: https://tracker.ceph.com/issues/50223 Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 17 +++++++++++++++- fs/ceph/mds_client.c | 48 ++++++++++++++++++++++++++++++++++++++++++++ fs/ceph/mds_client.h | 5 +++++ 3 files changed, 69 insertions(+), 1 deletion(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index bce3a840f15c2..7fb4aae974124 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4772,7 +4772,22 @@ int ceph_drop_caps_for_unlink(struct inode *inode) if (__ceph_caps_dirty(ci)) { struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; - __cap_delay_requeue_front(mdsc, ci); + + doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, + ceph_vinop(inode)); + spin_lock(&mdsc->cap_unlink_delay_lock); + ci->i_ceph_flags |= CEPH_I_FLUSH; + if (!list_empty(&ci->i_cap_delay_list)) + list_del_init(&ci->i_cap_delay_list); + list_add_tail(&ci->i_cap_delay_list, + &mdsc->cap_unlink_delay_list); + spin_unlock(&mdsc->cap_unlink_delay_lock); + + /* + * Fire the work immediately, because the MDS maybe + * waiting for caps release. + */ + ceph_queue_cap_unlink_work(mdsc); } } spin_unlock(&ci->i_ceph_lock); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f71bb9c9569fc..3ab9c268a8bb3 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2484,6 +2484,50 @@ void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr) } } +void ceph_queue_cap_unlink_work(struct ceph_mds_client *mdsc) +{ + struct ceph_client *cl = mdsc->fsc->client; + if (mdsc->stopping) + return; + + if (queue_work(mdsc->fsc->cap_wq, &mdsc->cap_unlink_work)) { + doutc(cl, "caps unlink work queued\n"); + } else { + doutc(cl, "failed to queue caps unlink work\n"); + } +} + +static void ceph_cap_unlink_work(struct work_struct *work) +{ + struct ceph_mds_client *mdsc = + container_of(work, struct ceph_mds_client, cap_unlink_work); + struct ceph_client *cl = mdsc->fsc->client; + + doutc(cl, "begin\n"); + spin_lock(&mdsc->cap_unlink_delay_lock); + while (!list_empty(&mdsc->cap_unlink_delay_list)) { + struct ceph_inode_info *ci; + struct inode *inode; + + ci = list_first_entry(&mdsc->cap_unlink_delay_list, + struct ceph_inode_info, + i_cap_delay_list); + list_del_init(&ci->i_cap_delay_list); + + inode = igrab(&ci->netfs.inode); + if (inode) { + spin_unlock(&mdsc->cap_unlink_delay_lock); + doutc(cl, "on %p %llx.%llx\n", inode, + ceph_vinop(inode)); + ceph_check_caps(ci, CHECK_CAPS_FLUSH); + iput(inode); + spin_lock(&mdsc->cap_unlink_delay_lock); + } + } + spin_unlock(&mdsc->cap_unlink_delay_lock); + doutc(cl, "done\n"); +} + /* * requests */ @@ -5359,6 +5403,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) INIT_LIST_HEAD(&mdsc->cap_delay_list); INIT_LIST_HEAD(&mdsc->cap_wait_list); spin_lock_init(&mdsc->cap_delay_lock); + INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list); + spin_lock_init(&mdsc->cap_unlink_delay_lock); INIT_LIST_HEAD(&mdsc->snap_flush_list); spin_lock_init(&mdsc->snap_flush_lock); mdsc->last_cap_flush_tid = 1; @@ -5367,6 +5413,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) spin_lock_init(&mdsc->cap_dirty_lock); init_waitqueue_head(&mdsc->cap_flushing_wq); INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work); + INIT_WORK(&mdsc->cap_unlink_work, ceph_cap_unlink_work); err = ceph_metric_init(&mdsc->metric); if (err) goto err_mdsmap; @@ -5640,6 +5687,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) ceph_cleanup_global_and_empty_realms(mdsc); cancel_work_sync(&mdsc->cap_reclaim_work); + cancel_work_sync(&mdsc->cap_unlink_work); cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ doutc(cl, "done\n"); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 40560af388272..03f8ff00874f7 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -462,6 +462,8 @@ struct ceph_mds_client { unsigned long last_renew_caps; /* last time we renewed our caps */ struct list_head cap_delay_list; /* caps with delayed release */ spinlock_t cap_delay_lock; /* protects cap_delay_list */ + struct list_head cap_unlink_delay_list; /* caps with delayed release for unlink */ + spinlock_t cap_unlink_delay_lock; /* protects cap_unlink_delay_list */ struct list_head snap_flush_list; /* cap_snaps ready to flush */ spinlock_t snap_flush_lock; @@ -475,6 +477,8 @@ struct ceph_mds_client { struct work_struct cap_reclaim_work; atomic_t cap_reclaim_pending; + struct work_struct cap_unlink_work; + /* * Cap reservations * @@ -574,6 +578,7 @@ extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc, struct ceph_mds_session *session); extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc); extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr); +extern void ceph_queue_cap_unlink_work(struct ceph_mds_client *mdsc); extern int ceph_iterate_session_caps(struct ceph_mds_session *session, int (*cb)(struct inode *, int mds, void *), void *arg); -- GitLab From 1741a8269e1c51fa08d4bfdf34667387a6eb10ec Mon Sep 17 00:00:00 2001 From: Manuel Fombuena Date: Sun, 11 Feb 2024 19:04:29 +0000 Subject: [PATCH 1006/1405] HID: multitouch: Add required quirk for Synaptics 0xcddc device Add support for the pointing stick (Accupoint) and 2 mouse buttons. Present on some Toshiba/dynabook Portege X30 and X40 laptops. It should close https://bugzilla.kernel.org/show_bug.cgi?id=205817 Signed-off-by: Manuel Fombuena Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index fd5b0637dad68..3e91e4d6ba6fa 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2151,6 +2151,10 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_SYNAPTICS, 0xcd7e) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_SYNAPTICS, 0xcddc) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_SYNAPTICS, 0xce08) }, -- GitLab From 846297e11e8ae428f8b00156a0cfe2db58100702 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Feb 2024 10:12:04 +0000 Subject: [PATCH 1007/1405] irqchip/gic-v3-its: Handle non-coherent GICv4 redistributors Although the GICv3 code base has gained some handling of systems failing to handle the shareability attributes, the GICv4 side of things has been firmly ignored. This is unfortunate, as the new recent addition of the "dma-noncoherent" is supposed to apply to all of the GICR tables, and not just the ones that are common to v3 and v4. Add some checks to handle the VPROPBASE/VPENDBASE shareability and cacheability attributes in the same way we deal with the other GICR_BASE registers, wrapping the flag check in a helper for improved readability. Note that this has been found by inspection only, as I don't have access to HW that suffers from this particular issue. Fixes: 3a0fff0fb6a3 ("irqchip/gic-v3: Enable non-coherent redistributors/ITSes DT probing") Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20240213101206.2137483-2-maz@kernel.org --- drivers/irqchip/irq-gic-v3-its.c | 37 +++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index d097001c1e3ee..fec1b58470df0 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -207,6 +207,11 @@ static bool require_its_list_vmovp(struct its_vm *vm, struct its_node *its) return (gic_rdists->has_rvpeid || vm->vlpi_count[its->list_nr]); } +static bool rdists_support_shareable(void) +{ + return !(gic_rdists->flags & RDIST_FLAGS_FORCE_NON_SHAREABLE); +} + static u16 get_its_list(struct its_vm *vm) { struct its_node *its; @@ -2710,10 +2715,12 @@ static u64 inherit_vpe_l1_table_from_its(void) break; } val |= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, addr >> 12); - val |= FIELD_PREP(GICR_VPROPBASER_SHAREABILITY_MASK, - FIELD_GET(GITS_BASER_SHAREABILITY_MASK, baser)); - val |= FIELD_PREP(GICR_VPROPBASER_INNER_CACHEABILITY_MASK, - FIELD_GET(GITS_BASER_INNER_CACHEABILITY_MASK, baser)); + if (rdists_support_shareable()) { + val |= FIELD_PREP(GICR_VPROPBASER_SHAREABILITY_MASK, + FIELD_GET(GITS_BASER_SHAREABILITY_MASK, baser)); + val |= FIELD_PREP(GICR_VPROPBASER_INNER_CACHEABILITY_MASK, + FIELD_GET(GITS_BASER_INNER_CACHEABILITY_MASK, baser)); + } val |= FIELD_PREP(GICR_VPROPBASER_4_1_SIZE, GITS_BASER_NR_PAGES(baser) - 1); return val; @@ -2936,8 +2943,10 @@ static int allocate_vpe_l1_table(void) WARN_ON(!IS_ALIGNED(pa, psz)); val |= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, pa >> 12); - val |= GICR_VPROPBASER_RaWb; - val |= GICR_VPROPBASER_InnerShareable; + if (rdists_support_shareable()) { + val |= GICR_VPROPBASER_RaWb; + val |= GICR_VPROPBASER_InnerShareable; + } val |= GICR_VPROPBASER_4_1_Z; val |= GICR_VPROPBASER_4_1_VALID; @@ -3126,7 +3135,7 @@ static void its_cpu_init_lpis(void) gicr_write_propbaser(val, rbase + GICR_PROPBASER); tmp = gicr_read_propbaser(rbase + GICR_PROPBASER); - if (gic_rdists->flags & RDIST_FLAGS_FORCE_NON_SHAREABLE) + if (!rdists_support_shareable()) tmp &= ~GICR_PROPBASER_SHAREABILITY_MASK; if ((tmp ^ val) & GICR_PROPBASER_SHAREABILITY_MASK) { @@ -3153,7 +3162,7 @@ static void its_cpu_init_lpis(void) gicr_write_pendbaser(val, rbase + GICR_PENDBASER); tmp = gicr_read_pendbaser(rbase + GICR_PENDBASER); - if (gic_rdists->flags & RDIST_FLAGS_FORCE_NON_SHAREABLE) + if (!rdists_support_shareable()) tmp &= ~GICR_PENDBASER_SHAREABILITY_MASK; if (!(tmp & GICR_PENDBASER_SHAREABILITY_MASK)) { @@ -3880,14 +3889,18 @@ static void its_vpe_schedule(struct its_vpe *vpe) val = virt_to_phys(page_address(vpe->its_vm->vprop_page)) & GENMASK_ULL(51, 12); val |= (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK; - val |= GICR_VPROPBASER_RaWb; - val |= GICR_VPROPBASER_InnerShareable; + if (rdists_support_shareable()) { + val |= GICR_VPROPBASER_RaWb; + val |= GICR_VPROPBASER_InnerShareable; + } gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); val = virt_to_phys(page_address(vpe->vpt_page)) & GENMASK_ULL(51, 16); - val |= GICR_VPENDBASER_RaWaWb; - val |= GICR_VPENDBASER_InnerShareable; + if (rdists_support_shareable()) { + val |= GICR_VPENDBASER_RaWaWb; + val |= GICR_VPENDBASER_InnerShareable; + } /* * There is no good way of finding out if the pending table is * empty as we can race against the doorbell interrupt very -- GitLab From 8b02da04ad978827e5ccd675acf170198f747a7a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Feb 2024 10:12:05 +0000 Subject: [PATCH 1008/1405] irqchip/gic-v3-its: Restore quirk probing for ACPI-based systems While refactoring the way the ITSs are probed, the handling of quirks applicable to ACPI-based platforms was lost. As a result, systems such as HIP07 lose their GICv4 functionnality, and some other may even fail to boot, unless they are configured to boot with DT. Move the enabling of quirks into its_probe_one(), making it common to all firmware implementations. Fixes: 9585a495ac93 ("irqchip/gic-v3-its: Split allocation from initialisation of its_node") Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Reviewed-by: Lorenzo Pieralisi Reviewed-by: Zenghui Yu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240213101206.2137483-3-maz@kernel.org --- drivers/irqchip/irq-gic-v3-its.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index fec1b58470df0..250b4562f308a 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -5091,6 +5091,8 @@ static int __init its_probe_one(struct its_node *its) u32 ctlr; int err; + its_enable_quirks(its); + if (is_v4(its)) { if (!(its->typer & GITS_TYPER_VMOVP)) { err = its_compute_its_list_map(its); @@ -5442,7 +5444,6 @@ static int __init its_of_probe(struct device_node *node) if (!its) return -ENOMEM; - its_enable_quirks(its); err = its_probe_one(its); if (err) { its_node_destroy(its); -- GitLab From af9acbfc2c4b72c378d0b9a2ee023ed01055d3e2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Feb 2024 10:12:06 +0000 Subject: [PATCH 1009/1405] irqchip/gic-v3-its: Fix GICv4.1 VPE affinity update When updating the affinity of a VPE, the VMOVP command is currently skipped if the two CPUs are part of the same VPE affinity. But this is wrong, as the doorbell corresponding to this VPE is still delivered on the 'old' CPU, which screws up the balancing. Furthermore, offlining that 'old' CPU results in doorbell interrupts generated for this VPE being discarded. The harsh reality is that VMOVP cannot be elided when a set_affinity() request occurs. It needs to be obeyed, and if an optimisation is to be made, it is at the point where the affinity change request is made (such as in KVM). Drop the VMOVP elision altogether, and only use the vpe_table_mask to try and stay within the same ITS affinity group if at all possible. Fixes: dd3f050a216e (irqchip/gic-v4.1: Implement the v4.1 flavour of VMOVP) Reported-by: Kunkun Jiang Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240213101206.2137483-4-maz@kernel.org --- drivers/irqchip/irq-gic-v3-its.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 250b4562f308a..53abd4779914d 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3826,8 +3826,9 @@ static int its_vpe_set_affinity(struct irq_data *d, bool force) { struct its_vpe *vpe = irq_data_get_irq_chip_data(d); - int from, cpu = cpumask_first(mask_val); + struct cpumask common, *table_mask; unsigned long flags; + int from, cpu; /* * Changing affinity is mega expensive, so let's be as lazy as @@ -3843,19 +3844,22 @@ static int its_vpe_set_affinity(struct irq_data *d, * taken on any vLPI handling path that evaluates vpe->col_idx. */ from = vpe_to_cpuid_lock(vpe, &flags); - if (from == cpu) - goto out; - - vpe->col_idx = cpu; + table_mask = gic_data_rdist_cpu(from)->vpe_table_mask; /* - * GICv4.1 allows us to skip VMOVP if moving to a cpu whose RD - * is sharing its VPE table with the current one. + * If we are offered another CPU in the same GICv4.1 ITS + * affinity, pick this one. Otherwise, any CPU will do. */ - if (gic_data_rdist_cpu(cpu)->vpe_table_mask && - cpumask_test_cpu(from, gic_data_rdist_cpu(cpu)->vpe_table_mask)) + if (table_mask && cpumask_and(&common, mask_val, table_mask)) + cpu = cpumask_test_cpu(from, &common) ? from : cpumask_first(&common); + else + cpu = cpumask_first(mask_val); + + if (from == cpu) goto out; + vpe->col_idx = cpu; + its_send_vmovp(vpe); its_vpe_db_proxy_move(vpe, from, cpu); -- GitLab From bdab6c94bb24758081625e619330b04cfd56570a Mon Sep 17 00:00:00 2001 From: Even Xu Date: Fri, 9 Feb 2024 14:52:32 +0800 Subject: [PATCH 1010/1405] HID: Intel-ish-hid: Ishtp: Fix sensor reads after ACPI S3 suspend After legacy suspend/resume via ACPI S3, sensor read operation fails with timeout. Also, it will cause delay in resume operation as there will be retries on failure. This is caused by commit f645a90e8ff7 ("HID: intel-ish-hid: ishtp-hid-client: use helper functions for connection"), which used helper functions to simplify connect, reset and disconnect process. Also avoid freeing and allocating client buffers again during reconnect process. But there is a case, when ISH firmware resets after ACPI S3 suspend, ishtp bus driver frees client buffers. Since there is no realloc again during reconnect, there are no client buffers available to send connection requests to the firmware. Without successful connection to the firmware, subsequent sensor reads will timeout. To address this issue, ishtp bus driver does not free client buffers on warm reset after S3 resume. Simply add the buffers from the read list to free list of buffers. Fixes: f645a90e8ff7 ("HID: intel-ish-hid: ishtp-hid-client: use helper functions for connection") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218442 Signed-off-by: Even Xu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp/bus.c | 2 ++ drivers/hid/intel-ish-hid/ishtp/client.c | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c index aa6cb033bb06b..03d5601ce807b 100644 --- a/drivers/hid/intel-ish-hid/ishtp/bus.c +++ b/drivers/hid/intel-ish-hid/ishtp/bus.c @@ -722,6 +722,8 @@ void ishtp_bus_remove_all_clients(struct ishtp_device *ishtp_dev, spin_lock_irqsave(&ishtp_dev->cl_list_lock, flags); list_for_each_entry(cl, &ishtp_dev->cl_list, link) { cl->state = ISHTP_CL_DISCONNECTED; + if (warm_reset && cl->device->reference_count) + continue; /* * Wake any pending process. The waiter would check dev->state diff --git a/drivers/hid/intel-ish-hid/ishtp/client.c b/drivers/hid/intel-ish-hid/ishtp/client.c index 82c907f01bd3b..8a7f2f6a4f868 100644 --- a/drivers/hid/intel-ish-hid/ishtp/client.c +++ b/drivers/hid/intel-ish-hid/ishtp/client.c @@ -49,7 +49,9 @@ static void ishtp_read_list_flush(struct ishtp_cl *cl) list_for_each_entry_safe(rb, next, &cl->dev->read_list.list, list) if (rb->cl && ishtp_cl_cmp_id(cl, rb->cl)) { list_del(&rb->list); - ishtp_io_rb_free(rb); + spin_lock(&cl->free_list_spinlock); + list_add_tail(&rb->list, &cl->free_rb_list.list); + spin_unlock(&cl->free_list_spinlock); } spin_unlock_irqrestore(&cl->dev->read_list_spinlock, flags); } -- GitLab From 25236c91b5ab4a26a56ba2e79b8060cf4e047839 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 9 Feb 2024 14:04:53 -0800 Subject: [PATCH 1011/1405] af_unix: Fix task hung while purging oob_skb in GC. syzbot reported a task hung; at the same time, GC was looping infinitely in list_for_each_entry_safe() for OOB skb. [0] syzbot demonstrated that the list_for_each_entry_safe() was not actually safe in this case. A single skb could have references for multiple sockets. If we free such a skb in the list_for_each_entry_safe(), the current and next sockets could be unlinked in a single iteration. unix_notinflight() uses list_del_init() to unlink the socket, so the prefetched next socket forms a loop itself and list_for_each_entry_safe() never stops. Here, we must use while() and make sure we always fetch the first socket. [0]: Sending NMI from CPU 0 to CPUs 1: NMI backtrace for cpu 1 CPU: 1 PID: 5065 Comm: syz-executor236 Not tainted 6.8.0-rc3-syzkaller-00136-g1f719a2f3fa6 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 RIP: 0010:preempt_count arch/x86/include/asm/preempt.h:26 [inline] RIP: 0010:check_kcov_mode kernel/kcov.c:173 [inline] RIP: 0010:__sanitizer_cov_trace_pc+0xd/0x60 kernel/kcov.c:207 Code: cc cc cc cc 66 0f 1f 84 00 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 65 48 8b 14 25 40 c2 03 00 <65> 8b 05 b4 7c 78 7e a9 00 01 ff 00 48 8b 34 24 74 0f f6 c4 01 74 RSP: 0018:ffffc900033efa58 EFLAGS: 00000283 RAX: ffff88807b077800 RBX: ffff88807b077800 RCX: 1ffffffff27b1189 RDX: ffff88802a5a3b80 RSI: ffffffff8968488d RDI: ffff88807b077f70 RBP: ffffc900033efbb0 R08: 0000000000000001 R09: fffffbfff27a900c R10: ffffffff93d48067 R11: ffffffff8ae000eb R12: ffff88807b077800 R13: dffffc0000000000 R14: ffff88807b077e40 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000564f4fc1e3a8 CR3: 000000000d57a000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: unix_gc+0x563/0x13b0 net/unix/garbage.c:319 unix_release_sock+0xa93/0xf80 net/unix/af_unix.c:683 unix_release+0x91/0xf0 net/unix/af_unix.c:1064 __sock_release+0xb0/0x270 net/socket.c:659 sock_close+0x1c/0x30 net/socket.c:1421 __fput+0x270/0xb80 fs/file_table.c:376 task_work_run+0x14f/0x250 kernel/task_work.c:180 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0xa8a/0x2ad0 kernel/exit.c:871 do_group_exit+0xd4/0x2a0 kernel/exit.c:1020 __do_sys_exit_group kernel/exit.c:1031 [inline] __se_sys_exit_group kernel/exit.c:1029 [inline] __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:1029 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xd5/0x270 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f9d6cbdac09 Code: Unable to access opcode bytes at 0x7f9d6cbdabdf. RSP: 002b:00007fff5952feb8 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9d6cbdac09 RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000000 RBP: 00007f9d6cc552b0 R08: ffffffffffffffb8 R09: 0000000000000006 R10: 0000000000000006 R11: 0000000000000246 R12: 00007f9d6cc552b0 R13: 0000000000000000 R14: 00007f9d6cc55d00 R15: 00007f9d6cbabe70 Reported-by: syzbot+4fa4a2d1f5a5ee06f006@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=4fa4a2d1f5a5ee06f006 Fixes: 1279f9d9dec2 ("af_unix: Call kfree_skb() for dead unix_(sk)->oob_skb in GC.") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240209220453.96053-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- net/unix/garbage.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 8f63f0b4bf012..2ff7ddbaa782e 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -315,10 +315,11 @@ void unix_gc(void) __skb_queue_purge(&hitlist); #if IS_ENABLED(CONFIG_AF_UNIX_OOB) - list_for_each_entry_safe(u, next, &gc_candidates, link) { - struct sk_buff *skb = u->oob_skb; + while (!list_empty(&gc_candidates)) { + u = list_entry(gc_candidates.next, struct unix_sock, link); + if (u->oob_skb) { + struct sk_buff *skb = u->oob_skb; - if (skb) { u->oob_skb = NULL; kfree_skb(skb); } -- GitLab From ab41a31dd5e2681803642b6d08590b61867840ec Mon Sep 17 00:00:00 2001 From: Tatsunosuke Tobita Date: Thu, 1 Feb 2024 13:40:55 +0900 Subject: [PATCH 1012/1405] HID: wacom: generic: Avoid reporting a serial of '0' to userspace The xf86-input-wacom driver does not treat '0' as a valid serial number and will drop any input report which contains an MSC_SERIAL = 0 event. The kernel driver already takes care to avoid sending any MSC_SERIAL event if the value of serial[0] == 0 (which is the case for devices that don't actually report a serial number), but this is not quite sufficient. Only the lower 32 bits of the serial get reported to userspace, so if this portion of the serial is zero then there can still be problems. This commit allows the driver to report either the lower 32 bits if they are non-zero or the upper 32 bits otherwise. Signed-off-by: Jason Gerecke Signed-off-by: Tatsunosuke Tobita Fixes: f85c9dc678a5 ("HID: wacom: generic: Support tool ID and additional tool types") CC: stable@vger.kernel.org # v4.10 Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index da8a01fedd394..fbe10fbc5769e 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2575,7 +2575,14 @@ static void wacom_wac_pen_report(struct hid_device *hdev, wacom_wac->hid_data.tipswitch); input_report_key(input, wacom_wac->tool[0], sense); if (wacom_wac->serial[0]) { - input_event(input, EV_MSC, MSC_SERIAL, wacom_wac->serial[0]); + /* + * xf86-input-wacom does not accept a serial number + * of '0'. Report the low 32 bits if possible, but + * if they are zero, report the upper ones instead. + */ + __u32 serial_lo = wacom_wac->serial[0] & 0xFFFFFFFFu; + __u32 serial_hi = wacom_wac->serial[0] >> 32; + input_event(input, EV_MSC, MSC_SERIAL, (int)(serial_lo ? serial_lo : serial_hi)); input_report_abs(input, ABS_MISC, sense ? id : 0); } -- GitLab From b419c5e2d9787a98c1410dbe1d4a25906b96f000 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 13 Feb 2024 13:31:56 +0200 Subject: [PATCH 1013/1405] Revert "iommu/arm-smmu: Convert to domain_alloc_paging()" This reverts commit 9b3febc3a3da ("iommu/arm-smmu: Convert to domain_alloc_paging()"). It breaks Qualcomm MSM8996 platform. Calling arm_smmu_write_context_bank() from new codepath results in the platform being reset because of the unclocked hardware access. Fixes: 9b3febc3a3da ("iommu/arm-smmu: Convert to domain_alloc_paging()") Signed-off-by: Dmitry Baryshkov Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20240213-iommu-revert-domain-alloc-v1-1-325ff55dece4@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 68b6bc5e7c710..6317aaf7b3ab1 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -859,10 +859,14 @@ static void arm_smmu_destroy_domain_context(struct arm_smmu_domain *smmu_domain) arm_smmu_rpm_put(smmu); } -static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) +static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) { struct arm_smmu_domain *smmu_domain; + if (type != IOMMU_DOMAIN_UNMANAGED) { + if (using_legacy_binding || type != IOMMU_DOMAIN_DMA) + return NULL; + } /* * Allocate the domain and initialise some of its data structures. * We can't really do anything meaningful until we've added a @@ -875,15 +879,6 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) mutex_init(&smmu_domain->init_mutex); spin_lock_init(&smmu_domain->cb_lock); - if (dev) { - struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev); - - if (arm_smmu_init_domain_context(smmu_domain, cfg->smmu, dev)) { - kfree(smmu_domain); - return NULL; - } - } - return &smmu_domain->domain; } @@ -1600,7 +1595,7 @@ static struct iommu_ops arm_smmu_ops = { .identity_domain = &arm_smmu_identity_domain, .blocked_domain = &arm_smmu_blocked_domain, .capable = arm_smmu_capable, - .domain_alloc_paging = arm_smmu_domain_alloc_paging, + .domain_alloc = arm_smmu_domain_alloc, .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .probe_finalize = arm_smmu_probe_finalize, -- GitLab From e083dd032eeba9e28e4703cd5aaf4a409ebc3837 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 10 Feb 2024 22:11:52 -0800 Subject: [PATCH 1014/1405] net: ti: icssg-prueth: add dependency for PTP When CONFIG_PTP_1588_CLOCK=m and CONFIG_TI_ICSSG_PRUETH=y, there are kconfig dependency warnings and build errors referencing PTP functions. Fix these by making TI_ICSSG_PRUETH depend on PTP_1588_CLOCK_OPTIONAL. Fixes these build errors and warnings: WARNING: unmet direct dependencies detected for TI_ICSS_IEP Depends on [m]: NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_TI [=y] && PTP_1588_CLOCK_OPTIONAL [=m] && TI_PRUSS [=y] Selected by [y]: - TI_ICSSG_PRUETH [=y] && NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_TI [=y] && PRU_REMOTEPROC [=y] && ARCH_K3 [=y] && OF [=y] && TI_K3_UDMA_GLUE_LAYER [=y] aarch64-linux-ld: drivers/net/ethernet/ti/icssg/icss_iep.o: in function `icss_iep_get_ptp_clock_idx': icss_iep.c:(.text+0x1d4): undefined reference to `ptp_clock_index' aarch64-linux-ld: drivers/net/ethernet/ti/icssg/icss_iep.o: in function `icss_iep_exit': icss_iep.c:(.text+0xde8): undefined reference to `ptp_clock_unregister' aarch64-linux-ld: drivers/net/ethernet/ti/icssg/icss_iep.o: in function `icss_iep_init': icss_iep.c:(.text+0x176c): undefined reference to `ptp_clock_register' Fixes: 186734c15886 ("net: ti: icssg-prueth: add packet timestamping and ptp support") Signed-off-by: Randy Dunlap Cc: Roger Quadros Cc: Md Danish Anwar Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: netdev@vger.kernel.org Reviewed-by: MD Danish Anwar Link: https://lore.kernel.org/r/20240211061152.14696-1-rdunlap@infradead.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index be01450c20dc0..1530d13984d42 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -189,6 +189,7 @@ config TI_ICSSG_PRUETH select TI_K3_CPPI_DESC_POOL depends on PRU_REMOTEPROC depends on ARCH_K3 && OF && TI_K3_UDMA_GLUE_LAYER + depends on PTP_1588_CLOCK_OPTIONAL help Support dual Gigabit Ethernet ports over the ICSSG PRU Subsystem. This subsystem is available starting with the AM65 platform. -- GitLab From d463bcd7eb341b5b6e6d9263a3bce9f405c2af98 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 13 Feb 2024 10:04:17 +0200 Subject: [PATCH 1015/1405] ASoC: SOF: Intel: pci-tgl: Change the default paths and firmware names The currently used paths and firmware name reflects the reference firmware convention: default_fw_path: intel/avs/{platform_name} default_lib_path: intel/avs-lib/{platform_name} default_tplg_path: intel/avs-tplg default_fw_filename: dsp_basefw.bin The SOF supports building the firmware for cAVS2.5 platforms using IPC4 and it is the preferred IPC4 implementation to be used on these devices. Change the paths and firmware names to reflect this: default_fw_path: intel/sof-ipc4/{platform_name} default_lib_path: intel/sof-ipc4-lib/{platform_name} default_tplg_path: intel/sof-ipc4-tplg default_fw_filename: sof-{platform_name}.ri Signed-off-by: Peter Ujfalusi Reviewed-by: Bard Liao Reviewed-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Link: https://msgid.link/r/20240213080418.21256-2-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/pci-tgl.c | 64 +++++++++++++++++------------------ 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c index 0660d4b2ac96b..a361ee9d1107f 100644 --- a/sound/soc/sof/intel/pci-tgl.c +++ b/sound/soc/sof/intel/pci-tgl.c @@ -33,18 +33,18 @@ static const struct sof_dev_desc tgl_desc = { .dspless_mode_supported = true, /* Only supported for HDaudio */ .default_fw_path = { [SOF_IPC_TYPE_3] = "intel/sof", - [SOF_IPC_TYPE_4] = "intel/avs/tgl", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4/tgl", }, .default_lib_path = { - [SOF_IPC_TYPE_4] = "intel/avs-lib/tgl", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/tgl", }, .default_tplg_path = { [SOF_IPC_TYPE_3] = "intel/sof-tplg", - [SOF_IPC_TYPE_4] = "intel/avs-tplg", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, .default_fw_filename = { [SOF_IPC_TYPE_3] = "sof-tgl.ri", - [SOF_IPC_TYPE_4] = "dsp_basefw.bin", + [SOF_IPC_TYPE_4] = "sof-tgl.ri", }, .nocodec_tplg_filename = "sof-tgl-nocodec.tplg", .ops = &sof_tgl_ops, @@ -66,18 +66,18 @@ static const struct sof_dev_desc tglh_desc = { .dspless_mode_supported = true, /* Only supported for HDaudio */ .default_fw_path = { [SOF_IPC_TYPE_3] = "intel/sof", - [SOF_IPC_TYPE_4] = "intel/avs/tgl-h", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4/tgl-h", }, .default_lib_path = { - [SOF_IPC_TYPE_4] = "intel/avs-lib/tgl-h", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/tgl-h", }, .default_tplg_path = { [SOF_IPC_TYPE_3] = "intel/sof-tplg", - [SOF_IPC_TYPE_4] = "intel/avs-tplg", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, .default_fw_filename = { [SOF_IPC_TYPE_3] = "sof-tgl-h.ri", - [SOF_IPC_TYPE_4] = "dsp_basefw.bin", + [SOF_IPC_TYPE_4] = "sof-tgl-h.ri", }, .nocodec_tplg_filename = "sof-tgl-nocodec.tplg", .ops = &sof_tgl_ops, @@ -98,18 +98,18 @@ static const struct sof_dev_desc ehl_desc = { .dspless_mode_supported = true, /* Only supported for HDaudio */ .default_fw_path = { [SOF_IPC_TYPE_3] = "intel/sof", - [SOF_IPC_TYPE_4] = "intel/avs/ehl", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4/ehl", }, .default_lib_path = { - [SOF_IPC_TYPE_4] = "intel/avs-lib/ehl", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/ehl", }, .default_tplg_path = { [SOF_IPC_TYPE_3] = "intel/sof-tplg", - [SOF_IPC_TYPE_4] = "intel/avs-tplg", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, .default_fw_filename = { [SOF_IPC_TYPE_3] = "sof-ehl.ri", - [SOF_IPC_TYPE_4] = "dsp_basefw.bin", + [SOF_IPC_TYPE_4] = "sof-ehl.ri", }, .nocodec_tplg_filename = "sof-ehl-nocodec.tplg", .ops = &sof_tgl_ops, @@ -131,18 +131,18 @@ static const struct sof_dev_desc adls_desc = { .dspless_mode_supported = true, /* Only supported for HDaudio */ .default_fw_path = { [SOF_IPC_TYPE_3] = "intel/sof", - [SOF_IPC_TYPE_4] = "intel/avs/adl-s", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4/adl-s", }, .default_lib_path = { - [SOF_IPC_TYPE_4] = "intel/avs-lib/adl-s", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/adl-s", }, .default_tplg_path = { [SOF_IPC_TYPE_3] = "intel/sof-tplg", - [SOF_IPC_TYPE_4] = "intel/avs-tplg", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, .default_fw_filename = { [SOF_IPC_TYPE_3] = "sof-adl-s.ri", - [SOF_IPC_TYPE_4] = "dsp_basefw.bin", + [SOF_IPC_TYPE_4] = "sof-adl-s.ri", }, .nocodec_tplg_filename = "sof-adl-nocodec.tplg", .ops = &sof_tgl_ops, @@ -164,18 +164,18 @@ static const struct sof_dev_desc adl_desc = { .dspless_mode_supported = true, /* Only supported for HDaudio */ .default_fw_path = { [SOF_IPC_TYPE_3] = "intel/sof", - [SOF_IPC_TYPE_4] = "intel/avs/adl", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4/adl", }, .default_lib_path = { - [SOF_IPC_TYPE_4] = "intel/avs-lib/adl", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/adl", }, .default_tplg_path = { [SOF_IPC_TYPE_3] = "intel/sof-tplg", - [SOF_IPC_TYPE_4] = "intel/avs-tplg", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, .default_fw_filename = { [SOF_IPC_TYPE_3] = "sof-adl.ri", - [SOF_IPC_TYPE_4] = "dsp_basefw.bin", + [SOF_IPC_TYPE_4] = "sof-adl.ri", }, .nocodec_tplg_filename = "sof-adl-nocodec.tplg", .ops = &sof_tgl_ops, @@ -197,18 +197,18 @@ static const struct sof_dev_desc adl_n_desc = { .dspless_mode_supported = true, /* Only supported for HDaudio */ .default_fw_path = { [SOF_IPC_TYPE_3] = "intel/sof", - [SOF_IPC_TYPE_4] = "intel/avs/adl-n", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4/adl-n", }, .default_lib_path = { - [SOF_IPC_TYPE_4] = "intel/avs-lib/adl-n", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/adl-n", }, .default_tplg_path = { [SOF_IPC_TYPE_3] = "intel/sof-tplg", - [SOF_IPC_TYPE_4] = "intel/avs-tplg", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, .default_fw_filename = { [SOF_IPC_TYPE_3] = "sof-adl-n.ri", - [SOF_IPC_TYPE_4] = "dsp_basefw.bin", + [SOF_IPC_TYPE_4] = "sof-adl-n.ri", }, .nocodec_tplg_filename = "sof-adl-nocodec.tplg", .ops = &sof_tgl_ops, @@ -230,18 +230,18 @@ static const struct sof_dev_desc rpls_desc = { .dspless_mode_supported = true, /* Only supported for HDaudio */ .default_fw_path = { [SOF_IPC_TYPE_3] = "intel/sof", - [SOF_IPC_TYPE_4] = "intel/avs/rpl-s", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4/rpl-s", }, .default_lib_path = { - [SOF_IPC_TYPE_4] = "intel/avs-lib/rpl-s", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/rpl-s", }, .default_tplg_path = { [SOF_IPC_TYPE_3] = "intel/sof-tplg", - [SOF_IPC_TYPE_4] = "intel/avs-tplg", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, .default_fw_filename = { [SOF_IPC_TYPE_3] = "sof-rpl-s.ri", - [SOF_IPC_TYPE_4] = "dsp_basefw.bin", + [SOF_IPC_TYPE_4] = "sof-rpl-s.ri", }, .nocodec_tplg_filename = "sof-rpl-nocodec.tplg", .ops = &sof_tgl_ops, @@ -263,18 +263,18 @@ static const struct sof_dev_desc rpl_desc = { .dspless_mode_supported = true, /* Only supported for HDaudio */ .default_fw_path = { [SOF_IPC_TYPE_3] = "intel/sof", - [SOF_IPC_TYPE_4] = "intel/avs/rpl", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4/rpl", }, .default_lib_path = { - [SOF_IPC_TYPE_4] = "intel/avs-lib/rpl", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/rpl", }, .default_tplg_path = { [SOF_IPC_TYPE_3] = "intel/sof-tplg", - [SOF_IPC_TYPE_4] = "intel/avs-tplg", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, .default_fw_filename = { [SOF_IPC_TYPE_3] = "sof-rpl.ri", - [SOF_IPC_TYPE_4] = "dsp_basefw.bin", + [SOF_IPC_TYPE_4] = "sof-rpl.ri", }, .nocodec_tplg_filename = "sof-rpl-nocodec.tplg", .ops = &sof_tgl_ops, -- GitLab From b029482011bffd57319507c2bf4c5a7e06eca756 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 13 Feb 2024 10:04:18 +0200 Subject: [PATCH 1016/1405] ASoC: SOF: Intel: pci-lnl: Change the topology path to intel/sof-ipc4-tplg The firmware release which going to introduce support for Lunar Lake will use the documented default topology directory for IPC4: intel/sof-ipc4-tplg Change the default path accordingly before sof-bin (sof-firmware) release includes Lunar Lake firmware and topologies. Link: https://github.com/thesofproject/sof-docs/blob/master/getting_started/intel_debug/introduction.rst#2-topology-file Signed-off-by: Peter Ujfalusi Reviewed-by: Mengdong Lin Reviewed-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Reviewed-by: Chao Song Link: https://msgid.link/r/20240213080418.21256-3-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/pci-lnl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/intel/pci-lnl.c b/sound/soc/sof/intel/pci-lnl.c index 78a57eb9cbc37..b26ffe767fab5 100644 --- a/sound/soc/sof/intel/pci-lnl.c +++ b/sound/soc/sof/intel/pci-lnl.c @@ -36,7 +36,7 @@ static const struct sof_dev_desc lnl_desc = { [SOF_IPC_TYPE_4] = "intel/sof-ipc4/lnl", }, .default_tplg_path = { - [SOF_IPC_TYPE_4] = "intel/sof-ace-tplg", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, .default_fw_filename = { [SOF_IPC_TYPE_4] = "sof-lnl.ri", -- GitLab From 5b5089e2a1e753ffe9ee2bf101a9e06784ec5e1a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 11:10:46 +0100 Subject: [PATCH 1017/1405] ASoC: q6dsp: fix event handler prototype clang-16 points out a mismatch in function types that was hidden by a typecast: sound/soc/qcom/qdsp6/q6apm-dai.c:355:38: error: cast from 'void (*)(uint32_t, uint32_t, uint32_t *, void *)' (aka 'void (*)(unsigned int, unsigned int, unsigned int *, void *)') to 'q6apm_cb' (aka 'void (*)(unsigned int, unsigned int, void *, void *)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 355 | prtd->graph = q6apm_graph_open(dev, (q6apm_cb)event_handler, prtd, graph_id); sound/soc/qcom/qdsp6/q6apm-dai.c:499:38: error: cast from 'void (*)(uint32_t, uint32_t, uint32_t *, void *)' (aka 'void (*)(unsigned int, unsigned int, unsigned int *, void *)') to 'q6apm_cb' (aka 'void (*)(unsigned int, unsigned int, void *, void *)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 499 | prtd->graph = q6apm_graph_open(dev, (q6apm_cb)event_handler_compr, prtd, graph_id); The only difference here is the 'payload' argument, which is not even used in this function, so just fix its type and remove the cast. Fixes: 88b60bf047fd ("ASoC: q6dsp: q6apm-dai: Add open/free compress DAI callbacks") Signed-off-by: Arnd Bergmann Link: https://msgid.link/r/20240213101105.459402-1-arnd@kernel.org Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6apm-dai.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/qcom/qdsp6/q6apm-dai.c b/sound/soc/qcom/qdsp6/q6apm-dai.c index 052e40cb38fec..00bbd291be5ce 100644 --- a/sound/soc/qcom/qdsp6/q6apm-dai.c +++ b/sound/soc/qcom/qdsp6/q6apm-dai.c @@ -123,7 +123,7 @@ static struct snd_pcm_hardware q6apm_dai_hardware_playback = { .fifo_size = 0, }; -static void event_handler(uint32_t opcode, uint32_t token, uint32_t *payload, void *priv) +static void event_handler(uint32_t opcode, uint32_t token, void *payload, void *priv) { struct q6apm_dai_rtd *prtd = priv; struct snd_pcm_substream *substream = prtd->substream; @@ -157,7 +157,7 @@ static void event_handler(uint32_t opcode, uint32_t token, uint32_t *payload, vo } static void event_handler_compr(uint32_t opcode, uint32_t token, - uint32_t *payload, void *priv) + void *payload, void *priv) { struct q6apm_dai_rtd *prtd = priv; struct snd_compr_stream *substream = prtd->cstream; @@ -352,7 +352,7 @@ static int q6apm_dai_open(struct snd_soc_component *component, spin_lock_init(&prtd->lock); prtd->substream = substream; - prtd->graph = q6apm_graph_open(dev, (q6apm_cb)event_handler, prtd, graph_id); + prtd->graph = q6apm_graph_open(dev, event_handler, prtd, graph_id); if (IS_ERR(prtd->graph)) { dev_err(dev, "%s: Could not allocate memory\n", __func__); ret = PTR_ERR(prtd->graph); @@ -496,7 +496,7 @@ static int q6apm_dai_compr_open(struct snd_soc_component *component, return -ENOMEM; prtd->cstream = stream; - prtd->graph = q6apm_graph_open(dev, (q6apm_cb)event_handler_compr, prtd, graph_id); + prtd->graph = q6apm_graph_open(dev, event_handler_compr, prtd, graph_id); if (IS_ERR(prtd->graph)) { ret = PTR_ERR(prtd->graph); kfree(prtd); -- GitLab From c40aad7c81e5fba34b70123ed7ce3397fa62a4d2 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 13 Feb 2024 13:52:33 +0200 Subject: [PATCH 1018/1405] ASoC: SOF: ipc4-pcm: Workaround for crashed firmware on system suspend When the system is suspended while audio is active, the sof_ipc4_pcm_hw_free() is invoked to reset the pipelines since during suspend the DSP is turned off, streams will be re-started after resume. If the firmware crashes during while audio is running (or when we reset the stream before suspend) then the sof_ipc4_set_multi_pipeline_state() will fail with IPC error and the state change is interrupted. This will cause misalignment between the kernel and firmware state on next DSP boot resulting errors returned by firmware for IPC messages, eventually failing the audio resume. On stream close the errors are ignored so the kernel state will be corrected on the next DSP boot, so the second boot after the DSP panic. If sof_ipc4_trigger_pipelines() is called from sof_ipc4_pcm_hw_free() then state parameter is SOF_IPC4_PIPE_RESET and only in this case. Treat a forced pipeline reset similarly to how we treat a pcm_free by ignoring error on state sending to allow the kernel's state to be consistent with the state the firmware will have after the next boot. Link: https://github.com/thesofproject/sof/issues/8721 Signed-off-by: Peter Ujfalusi Reviewed-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Link: https://msgid.link/r/20240213115233.15716-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4-pcm.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c index 85d3f390e4b29..07eb5c6d4adf3 100644 --- a/sound/soc/sof/ipc4-pcm.c +++ b/sound/soc/sof/ipc4-pcm.c @@ -413,7 +413,18 @@ static int sof_ipc4_trigger_pipelines(struct snd_soc_component *component, ret = sof_ipc4_set_multi_pipeline_state(sdev, state, trigger_list); if (ret < 0) { dev_err(sdev->dev, "failed to set final state %d for all pipelines\n", state); - goto free; + /* + * workaround: if the firmware is crashed while setting the + * pipelines to reset state we must ignore the error code and + * reset it to 0. + * Since the firmware is crashed we will not send IPC messages + * and we are going to see errors printed, but the state of the + * widgets will be correct for the next boot. + */ + if (sdev->fw_state != SOF_FW_CRASHED || state != SOF_IPC4_PIPE_RESET) + goto free; + + ret = 0; } /* update RUNNING/RESET state for all pipelines that were just triggered */ -- GitLab From fcbe4873089c84da641df75cda9cac2e9addbb4b Mon Sep 17 00:00:00 2001 From: Curtis Malainey Date: Tue, 13 Feb 2024 14:38:34 +0200 Subject: [PATCH 1019/1405] ASoC: SOF: IPC3: fix message bounds on ipc ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 74ad8ed65121 ("ASoC: SOF: ipc3: Implement rx_msg IPC ops") introduced a new allocation before the upper bounds check in do_rx_work. As a result A DSP can cause bad allocations if spewing garbage. Fixes: 74ad8ed65121 ("ASoC: SOF: ipc3: Implement rx_msg IPC ops") Reported-by: Tim Van Patten Cc: stable@vger.kernel.org Signed-off-by: Curtis Malainey Reviewed-by: Péter Ujfalusi Reviewed-by: Daniel Baluta Reviewed-by: Pierre-Louis Bossart Signed-off-by: Peter Ujfalusi Link: https://msgid.link/r/20240213123834.4827-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/ipc3.c b/sound/soc/sof/ipc3.c index fb40378ad0840..c03dd513fbff1 100644 --- a/sound/soc/sof/ipc3.c +++ b/sound/soc/sof/ipc3.c @@ -1067,7 +1067,7 @@ static void sof_ipc3_rx_msg(struct snd_sof_dev *sdev) return; } - if (hdr.size < sizeof(hdr)) { + if (hdr.size < sizeof(hdr) || hdr.size > SOF_IPC_MSG_MAX_SIZE) { dev_err(sdev->dev, "The received message size is invalid\n"); return; } -- GitLab From 24b6332c2d4ff08fb7601ac8f751a5ba51e0ebd3 Mon Sep 17 00:00:00 2001 From: Tomasz Kudela Date: Tue, 13 Feb 2024 12:56:14 +0100 Subject: [PATCH 1020/1405] ALSA: hda: Add Lenovo Legion 7i gen7 sound quirk Add sound support for the Legion 7i gen7 laptop (16IAX7). Signed-off-by: Tomasz Kudela Link: https://lore.kernel.org/r/20240213115614.10420-1-ramzes005@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l41_hda_property.c | 2 ++ sound/pci/hda/patch_realtek.c | 1 + 2 files changed, 3 insertions(+) diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c index d74cf11eef1ea..57b21285ab6a6 100644 --- a/sound/pci/hda/cs35l41_hda_property.c +++ b/sound/pci/hda/cs35l41_hda_property.c @@ -95,6 +95,7 @@ static const struct cs35l41_config cs35l41_config_table[] = { { "10431F12", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, { "10431F1F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 0, 0, 0 }, { "10431F62", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, + { "17AA386F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, -1, -1, 0, 0, 0 }, { "17AA38B4", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, { "17AA38B5", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, { "17AA38B6", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, @@ -431,6 +432,7 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = { { "CSC3551", "10431F12", generic_dsd_config }, { "CSC3551", "10431F1F", generic_dsd_config }, { "CSC3551", "10431F62", generic_dsd_config }, + { "CSC3551", "17AA386F", generic_dsd_config }, { "CSC3551", "17AA38B4", generic_dsd_config }, { "CSC3551", "17AA38B5", generic_dsd_config }, { "CSC3551", "17AA38B6", generic_dsd_config }, diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e045d3e76a45e..98886b803bc55 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10262,6 +10262,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3853, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3855, "Legion 7 16ITHG6", ALC287_FIXUP_LEGION_16ITHG6), SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), + SND_PCI_QUIRK(0x17aa, 0x386f, "Legion 7i 16IAX7", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x3870, "Lenovo Yoga 7 14ARB7", ALC287_FIXUP_YOGA7_14ARB7_I2C), SND_PCI_QUIRK(0x17aa, 0x387d, "Yoga S780-16 pro Quad AAC", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x387e, "Yoga S780-16 pro Quad YC", ALC287_FIXUP_TAS2781_I2C), -- GitLab From b671cd3d456315f63171a670769356a196cf7fd0 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 21 Aug 2023 16:02:01 -0400 Subject: [PATCH 1021/1405] drm/prime: Support page array >= 4GB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without unsigned long typecast, the size is passed in as zero if page array size >= 4GB, nr_pages >= 0x100000, then sg list converted will have the first and the last chunk lost. Signed-off-by: Philip Yang Acked-by: Felix Kuehling Reviewed-by: Christian König CC: stable@vger.kernel.org Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20230821200201.24685-1-Philip.Yang@amd.com --- drivers/gpu/drm/drm_prime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 834a5e28abbe5..7352bde299d54 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -820,7 +820,7 @@ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev, if (max_segment == 0) max_segment = UINT_MAX; err = sg_alloc_table_from_pages_segment(sg, pages, nr_pages, 0, - nr_pages << PAGE_SHIFT, + (unsigned long)nr_pages << PAGE_SHIFT, max_segment, GFP_KERNEL); if (err) { kfree(sg); -- GitLab From 269e31aecdd0b70f53a05def79480f15cbcc0fd6 Mon Sep 17 00:00:00 2001 From: Ralf Schlatterbeck Date: Fri, 2 Feb 2024 12:53:30 +0100 Subject: [PATCH 1022/1405] spi-mxs: Fix chipselect glitch There was a change in the mxs-dma engine that uses a new custom flag. The change was not applied to the mxs spi driver. This results in chipselect being deasserted too early. This fixes the chipselect problem by using the new flag in the mxs-spi driver. Fixes: ceeeb99cd821 ("dmaengine: mxs: rename custom flag") Signed-off-by: Ralf Schlatterbeck Link: https://msgid.link/r/20240202115330.wxkbfmvd76sy3a6a@runtux.com Signed-off-by: Mark Brown --- drivers/spi/spi-mxs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-mxs.c b/drivers/spi/spi-mxs.c index 1bf080339b5a7..88cbe4f00cc3b 100644 --- a/drivers/spi/spi-mxs.c +++ b/drivers/spi/spi-mxs.c @@ -39,6 +39,7 @@ #include #include #include +#include #define DRIVER_NAME "mxs-spi" @@ -252,7 +253,7 @@ static int mxs_spi_txrx_dma(struct mxs_spi *spi, desc = dmaengine_prep_slave_sg(ssp->dmach, &dma_xfer[sg_count].sg, 1, (flags & TXRX_WRITE) ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + DMA_PREP_INTERRUPT | MXS_DMA_CTRL_WAIT4END); if (!desc) { dev_err(ssp->dev, -- GitLab From b6802b61a9d0e99dcfa6fff7c50db7c48a9623d3 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 12 Feb 2024 13:55:34 -0800 Subject: [PATCH 1023/1405] drm/crtc: fix uninitialized variable use even harder DRM_MODESET_LOCK_ALL_BEGIN() has a hidden trap-door (aka retry loop), which means we can't rely too much on variable initializers. Fixes: 6e455f5dcdd1 ("drm/crtc: fix uninitialized variable use") Signed-off-by: Rob Clark Reviewed-by: Daniel Vetter Reviewed-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov # sc7180, sdm845 Link: https://patchwork.freedesktop.org/patch/msgid/20240212215534.190682-1-robdclark@gmail.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/drm_crtc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index cb90e70d85e86..65f9f66933bba 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -904,6 +904,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, connector_set = NULL; fb = NULL; mode = NULL; + num_connectors = 0; DRM_MODESET_LOCK_ALL_END(dev, ctx, ret); -- GitLab From 79bd7eab8366b29eaa099908d5694cb473684b9d Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 13 Feb 2024 00:26:46 -0800 Subject: [PATCH 1024/1405] nvme-fabrics: fix I/O connect error handling In nvmf_connect_io_queue(), if connect I/O command fails, we log the error and continue for authentication. This overrides error captured from __nvme_submit_sync_cmd(), causing wrong return value. Add goto out_free_data after logging connect error to fix the issue. Fixes: f50fff73d620c ("nvme: implement In-Band authentication") Signed-off-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/host/fabrics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 3499acbf6a822..495c171daead1 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -534,6 +534,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) if (ret) { nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); + goto out_free_data; } result = le32_to_cpu(res.u32); if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) { -- GitLab From 4e06ec0774f5bebf10e27bc7a5ace4b48ae0fa56 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 24 Jan 2024 13:07:33 -0600 Subject: [PATCH 1025/1405] dt-bindings: ufs: samsung,exynos-ufs: Add size constraints on "samsung,sysreg" The 'phandle-array' type is a bit ambiguous. It can be either just an array of phandles or an array of phandles plus args. "samsung,sysreg" is the latter and needs to be constrained to a single entry with a phandle and offset. Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240124190733.1554314-1-robh@kernel.org Signed-off-by: Rob Herring --- .../devicetree/bindings/ufs/samsung,exynos-ufs.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml b/Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml index 88cc1e3a0c887..b2b509b3944d8 100644 --- a/Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml +++ b/Documentation/devicetree/bindings/ufs/samsung,exynos-ufs.yaml @@ -55,9 +55,12 @@ properties: samsung,sysreg: $ref: /schemas/types.yaml#/definitions/phandle-array - description: Should be phandle/offset pair. The phandle to the syscon node - which indicates the FSYSx sysreg interface and the offset of - the control register for UFS io coherency setting. + items: + - items: + - description: phandle to FSYSx sysreg node + - description: offset of the control register for UFS io coherency setting + description: + Phandle and offset to the FSYSx sysreg for UFS io coherency setting. dma-coherent: true -- GitLab From f03606470886e781e68b5d7acf2afb23b86cd7fa Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 13 Feb 2024 15:46:38 +0100 Subject: [PATCH 1026/1405] riscv: dts: starfive: replace underscores in node names Underscores should not be used in node names (dtc with W=2 warns about them), so replace them with hyphens. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/starfive/jh7100.dtsi | 12 ++++++------ arch/riscv/boot/dts/starfive/jh7110.dtsi | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/riscv/boot/dts/starfive/jh7100.dtsi b/arch/riscv/boot/dts/starfive/jh7100.dtsi index c216aaecac53f..8bcf36d07f3f7 100644 --- a/arch/riscv/boot/dts/starfive/jh7100.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7100.dtsi @@ -96,14 +96,14 @@ thermal-sensors = <&sfctemp>; trips { - cpu_alert0 { + cpu-alert0 { /* milliCelsius */ temperature = <75000>; hysteresis = <2000>; type = "passive"; }; - cpu_crit { + cpu-crit { /* milliCelsius */ temperature = <90000>; hysteresis = <2000>; @@ -113,28 +113,28 @@ }; }; - osc_sys: osc_sys { + osc_sys: osc-sys { compatible = "fixed-clock"; #clock-cells = <0>; /* This value must be overridden by the board */ clock-frequency = <0>; }; - osc_aud: osc_aud { + osc_aud: osc-aud { compatible = "fixed-clock"; #clock-cells = <0>; /* This value must be overridden by the board */ clock-frequency = <0>; }; - gmac_rmii_ref: gmac_rmii_ref { + gmac_rmii_ref: gmac-rmii-ref { compatible = "fixed-clock"; #clock-cells = <0>; /* Should be overridden by the board when needed */ clock-frequency = <0>; }; - gmac_gr_mii_rxclk: gmac_gr_mii_rxclk { + gmac_gr_mii_rxclk: gmac-gr-mii-rxclk { compatible = "fixed-clock"; #clock-cells = <0>; /* Should be overridden by the board when needed */ diff --git a/arch/riscv/boot/dts/starfive/jh7110.dtsi b/arch/riscv/boot/dts/starfive/jh7110.dtsi index 45213cdf50dc7..74ed3b9264d8f 100644 --- a/arch/riscv/boot/dts/starfive/jh7110.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7110.dtsi @@ -237,14 +237,14 @@ }; trips { - cpu_alert0: cpu_alert0 { + cpu_alert0: cpu-alert0 { /* milliCelsius */ temperature = <85000>; hysteresis = <2000>; type = "passive"; }; - cpu_crit { + cpu-crit { /* milliCelsius */ temperature = <100000>; hysteresis = <2000>; -- GitLab From 29f6975332479f92233594901c649ff4d71f8cb6 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 5 Feb 2024 11:10:25 -0800 Subject: [PATCH 1027/1405] nvme: implement support for relaxed effects NVM Express TP4167 provides a way for controllers to report a relaxed execution constraint. Specifically, it notifies of exclusivity for IO vs. admin commands instead of grouping these together. If set, then we don't need to freeze IO in order to execute that admin command. The freezing distrupts IO processes, so it's nice to avoid that if the controller tells us it's not necessary. Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 4 ++++ include/linux/nvme.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 60537c9224bf9..0a96362912ced 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1153,6 +1153,10 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) effects &= ~NVME_CMD_EFFECTS_CSE_MASK; } else { effects = le32_to_cpu(ctrl->effects->acs[opcode]); + + /* Ignore execution restrictions if any relaxation bits are set */ + if (effects & NVME_CMD_EFFECTS_CSER_MASK) + effects &= ~NVME_CMD_EFFECTS_CSE_MASK; } return effects; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index bc605ec4a3fd0..3ef4053ea9500 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -646,6 +646,7 @@ enum { NVME_CMD_EFFECTS_NCC = 1 << 2, NVME_CMD_EFFECTS_NIC = 1 << 3, NVME_CMD_EFFECTS_CCC = 1 << 4, + NVME_CMD_EFFECTS_CSER_MASK = GENMASK(15, 14), NVME_CMD_EFFECTS_CSE_MASK = GENMASK(18, 16), NVME_CMD_EFFECTS_UUID_SEL = 1 << 19, NVME_CMD_EFFECTS_SCOPE_MASK = GENMASK(31, 20), -- GitLab From bdbddb109c75365d22ec4826f480c5e75869e1cb Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Tue, 13 Feb 2024 14:24:34 +0100 Subject: [PATCH 1028/1405] tracing: Fix HAVE_DYNAMIC_FTRACE_WITH_REGS ifdef Commit a8b9cf62ade1 ("ftrace: Fix DIRECT_CALLS to use SAVE_REGS by default") attempted to fix an issue with direct trampolines on x86, see its description for details. However, it wrongly referenced the HAVE_DYNAMIC_FTRACE_WITH_REGS config option and the problem is still present. Add the missing "CONFIG_" prefix for the logic to work as intended. Link: https://lore.kernel.org/linux-trace-kernel/20240213132434.22537-1-petr.pavlu@suse.com Fixes: a8b9cf62ade1 ("ftrace: Fix DIRECT_CALLS to use SAVE_REGS by default") Signed-off-by: Petr Pavlu Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index c060d5b479102..83ba342aef31f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5331,7 +5331,7 @@ static int register_ftrace_function_nolock(struct ftrace_ops *ops); * not support ftrace_regs_caller but direct_call, use SAVE_ARGS so that it * jumps from ftrace_caller for multiple ftrace_ops. */ -#ifndef HAVE_DYNAMIC_FTRACE_WITH_REGS +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS #define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_ARGS) #else #define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS) -- GitLab From a6eaa24f1cc2c7aecec6047556bdfe32042094c3 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 5 Feb 2024 07:53:40 +0100 Subject: [PATCH 1029/1405] tracing: Use ring_buffer_record_is_set_on() in tracer_tracing_is_on() tracer_tracing_is_on() checks whether record_disabled is not zero. This checks both the record_disabled counter and the RB_BUFFER_OFF flag. Reading the source it looks like this function should only check for the RB_BUFFER_OFF flag. Therefore use ring_buffer_record_is_set_on(). This fixes spurious fails in the 'test for function traceon/off triggers' test from the ftrace testsuite when the system is under load. Link: https://lore.kernel.org/linux-trace-kernel/20240205065340.2848065-1-svens@linux.ibm.com Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Tested-By: Mete Durlu Signed-off-by: Sven Schnelle Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9ff8a439d6746..aa54810e8b569 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1532,7 +1532,7 @@ void disable_trace_on_warning(void) bool tracer_tracing_is_on(struct trace_array *tr) { if (tr->array_buffer.buffer) - return ring_buffer_record_is_on(tr->array_buffer.buffer); + return ring_buffer_record_is_set_on(tr->array_buffer.buffer); return !tr->buffer_disabled; } -- GitLab From feefe1f49d26bad9d8997096e3a200280fa7b1c5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 31 Jan 2024 17:18:04 +0000 Subject: [PATCH 1030/1405] btrfs: don't reserve space for checksums when writing to nocow files Currently when doing a write to a file we always reserve metadata space for inserting data checksums. However we don't need to do it if we have a nodatacow file (-o nodatacow mount option or chattr +C) or if checksums are disabled (-o nodatasum mount option), as in that case we are only adding unnecessary pressure to metadata reservations. For example on x86_64, with the default node size of 16K, a 4K buffered write into a nodatacow file is reserving 655360 bytes of metadata space, as it's accounting for checksums. After this change, which stops reserving space for checksums if we have a nodatacow file or checksums are disabled, we only need to reserve 393216 bytes of metadata. CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/delalloc-space.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c index 2833e8ef4c098..acf9f4b6c0440 100644 --- a/fs/btrfs/delalloc-space.c +++ b/fs/btrfs/delalloc-space.c @@ -245,7 +245,6 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv = &inode->block_rsv; u64 reserve_size = 0; u64 qgroup_rsv_size = 0; - u64 csum_leaves; unsigned outstanding_extents; lockdep_assert_held(&inode->lock); @@ -260,10 +259,12 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, outstanding_extents); reserve_size += btrfs_calc_metadata_size(fs_info, 1); } - csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, - inode->csum_bytes); - reserve_size += btrfs_calc_insert_metadata_size(fs_info, - csum_leaves); + if (!(inode->flags & BTRFS_INODE_NODATASUM)) { + u64 csum_leaves; + + csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes); + reserve_size += btrfs_calc_insert_metadata_size(fs_info, csum_leaves); + } /* * For qgroup rsv, the calculation is very simple: * account one nodesize for each outstanding extent @@ -278,14 +279,20 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, spin_unlock(&block_rsv->lock); } -static void calc_inode_reservations(struct btrfs_fs_info *fs_info, +static void calc_inode_reservations(struct btrfs_inode *inode, u64 num_bytes, u64 disk_num_bytes, u64 *meta_reserve, u64 *qgroup_reserve) { + struct btrfs_fs_info *fs_info = inode->root->fs_info; u64 nr_extents = count_max_extents(fs_info, num_bytes); - u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes); + u64 csum_leaves; u64 inode_update = btrfs_calc_metadata_size(fs_info, 1); + if (inode->flags & BTRFS_INODE_NODATASUM) + csum_leaves = 0; + else + csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes); + *meta_reserve = btrfs_calc_insert_metadata_size(fs_info, nr_extents + csum_leaves); @@ -337,7 +344,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes, * everything out and try again, which is bad. This way we just * over-reserve slightly, and clean up the mess when we are done. */ - calc_inode_reservations(fs_info, num_bytes, disk_num_bytes, + calc_inode_reservations(inode, num_bytes, disk_num_bytes, &meta_reserve, &qgroup_reserve); ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true, noflush); @@ -359,7 +366,8 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes, nr_extents = count_max_extents(fs_info, num_bytes); spin_lock(&inode->lock); btrfs_mod_outstanding_extents(inode, nr_extents); - inode->csum_bytes += disk_num_bytes; + if (!(inode->flags & BTRFS_INODE_NODATASUM)) + inode->csum_bytes += disk_num_bytes; btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); @@ -393,7 +401,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, num_bytes = ALIGN(num_bytes, fs_info->sectorsize); spin_lock(&inode->lock); - inode->csum_bytes -= num_bytes; + if (!(inode->flags & BTRFS_INODE_NODATASUM)) + inode->csum_bytes -= num_bytes; btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); -- GitLab From 1bd96c92c6a0a4d43815eb685c15aa4b78879dc9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 2 Feb 2024 12:09:22 +0000 Subject: [PATCH 1031/1405] btrfs: reject encoded write if inode has nodatasum flag set Currently we allow an encoded write against inodes that have the NODATASUM flag set, either because they are NOCOW files or they were created while the filesystem was mounted with "-o nodatasum". This results in having compressed extents without corresponding checksums, which is a filesystem inconsistency reported by 'btrfs check'. For example, running btrfs/281 with MOUNT_OPTIONS="-o nodatacow" triggers this and 'btrfs check' errors out with: [1/7] checking root items [2/7] checking extents [3/7] checking free space tree [4/7] checking fs roots root 256 inode 257 errors 1040, bad file extent, some csum missing root 256 inode 258 errors 1040, bad file extent, some csum missing ERROR: errors found in fs roots (...) So reject encoded writes if the target inode has NODATASUM set. CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d232eca1bbee2..5f39462dceb0b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10288,6 +10288,13 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, if (encoded->encryption != BTRFS_ENCODED_IO_ENCRYPTION_NONE) return -EINVAL; + /* + * Compressed extents should always have checksums, so error out if we + * have a NOCOW file or inode was created while mounted with NODATASUM. + */ + if (inode->flags & BTRFS_INODE_NODATASUM) + return -EINVAL; + orig_count = iov_iter_count(from); /* The extent size must be sane. */ -- GitLab From 88e81a67773017a2b93f6d5fe7c78bb0c5a6e4dd Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 12 Feb 2024 21:50:53 +0000 Subject: [PATCH 1032/1405] btrfs: zoned: fix chunk map leak when loading block group zone info At btrfs_load_block_group_zone_info() we never drop a reference on the chunk map we have looked up, therefore leaking a reference on it. So add the missing btrfs_free_chunk_map() at the end of the function. Fixes: 7dc66abb5a47 ("btrfs: use a dedicated data structure for chunk maps") Reported-by: Johannes Thumshirn Reviewed-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Reviewed-by: Anand Jain Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index ac9bbe0c4ffe6..afeb1dc1f43ae 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1687,6 +1687,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) } bitmap_free(active); kfree(zone_info); + btrfs_free_chunk_map(map); return ret; } -- GitLab From 2f6397e448e689adf57e6788c90f913abd7e1af8 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 2 Feb 2024 14:32:17 +0000 Subject: [PATCH 1033/1405] btrfs: don't refill whole delayed refs block reserve when starting transaction Since commit 28270e25c69a ("btrfs: always reserve space for delayed refs when starting transaction") we started not only to reserve metadata space for the delayed refs a caller of btrfs_start_transaction() might generate but also to try to fully refill the delayed refs block reserve, because there are several case where we generate delayed refs and haven't reserved space for them, relying on the global block reserve. Relying too much on the global block reserve is not always safe, and can result in hitting -ENOSPC during transaction commits or worst, in rare cases, being unable to mount a filesystem that needs to do orphan cleanup or anything that requires modifying the filesystem during mount, and has no more unallocated space and the metadata space is nearly full. This was explained in detail in that commit's change log. However the gap between the reserved amount and the size of the delayed refs block reserve can be huge, so attempting to reserve space for such a gap can result in allocating many metadata block groups that end up not being used. After a recent patch, with the subject: "btrfs: add new unused block groups to the list of unused block groups" We started to add new block groups that are unused to the list of unused block groups, to avoid having them around for a very long time in case they are never used, because a block group is only added to the list of unused block groups when we deallocate the last extent or when mounting the filesystem and the block group has 0 bytes used. This is not a problem introduced by the commit mentioned earlier, it always existed as our metadata space reservations are, most of the time, pessimistic and end up not using all the space they reserved, so we can occasionally end up with one or two unused metadata block groups for a long period. However after that commit mentioned earlier, we are just more pessimistic in the metadata space reservations when starting a transaction and therefore the issue is more likely to happen. This however is not always enough because we might create unused metadata block groups when reserving metadata space at a high rate if there's always a gap in the delayed refs block reserve and the cleaner kthread isn't triggered often enough or is busy with other work (running delayed iputs, cleaning deleted roots, etc), not to mention the block group's allocated space is only usable for a new block group after the transaction used to remove it is committed. A user reported that he's getting a lot of allocated metadata block groups but the usage percentage of metadata space was very low compared to the total allocated space, specially after running a series of block group relocations. So for now stop trying to refill the gap in the delayed refs block reserve and reserve space only for the delayed refs we are expected to generate when starting a transaction. CC: stable@vger.kernel.org # 6.7+ Reported-by: Ivan Shapovalov Link: https://lore.kernel.org/linux-btrfs/9cdbf0ca9cdda1b4c84e15e548af7d7f9f926382.camel@intelfx.name/ Link: https://lore.kernel.org/linux-btrfs/CAL3q7H6802ayLHUJFztzZAVzBLJAGdFx=6FHNNy87+obZXXZpQ@mail.gmail.com/ Tested-by: Ivan Shapovalov Reported-by: Heddxh Link: https://lore.kernel.org/linux-btrfs/CAE93xANEby6RezOD=zcofENYZOT-wpYygJyauyUAZkLv6XVFOA@mail.gmail.com/ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 38 ++------------------------------------ 1 file changed, 2 insertions(+), 36 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 5b3333ceef048..c52807d97efa5 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -564,56 +564,22 @@ static int btrfs_reserve_trans_metadata(struct btrfs_fs_info *fs_info, u64 num_bytes, u64 *delayed_refs_bytes) { - struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; struct btrfs_space_info *si = fs_info->trans_block_rsv.space_info; - u64 extra_delayed_refs_bytes = 0; - u64 bytes; + u64 bytes = num_bytes + *delayed_refs_bytes; int ret; - /* - * If there's a gap between the size of the delayed refs reserve and - * its reserved space, than some tasks have added delayed refs or bumped - * its size otherwise (due to block group creation or removal, or block - * group item update). Also try to allocate that gap in order to prevent - * using (and possibly abusing) the global reserve when committing the - * transaction. - */ - if (flush == BTRFS_RESERVE_FLUSH_ALL && - !btrfs_block_rsv_full(delayed_refs_rsv)) { - spin_lock(&delayed_refs_rsv->lock); - if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) - extra_delayed_refs_bytes = delayed_refs_rsv->size - - delayed_refs_rsv->reserved; - spin_unlock(&delayed_refs_rsv->lock); - } - - bytes = num_bytes + *delayed_refs_bytes + extra_delayed_refs_bytes; - /* * We want to reserve all the bytes we may need all at once, so we only * do 1 enospc flushing cycle per transaction start. */ ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush); - if (ret == 0) { - if (extra_delayed_refs_bytes > 0) - btrfs_migrate_to_delayed_refs_rsv(fs_info, - extra_delayed_refs_bytes); - return 0; - } - - if (extra_delayed_refs_bytes > 0) { - bytes -= extra_delayed_refs_bytes; - ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush); - if (ret == 0) - return 0; - } /* * If we are an emergency flush, which can steal from the global block * reserve, then attempt to not reserve space for the delayed refs, as * we will consume space for them from the global block reserve. */ - if (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) { + if (ret && flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) { bytes -= *delayed_refs_bytes; *delayed_refs_bytes = 0; ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush); -- GitLab From e58779f47e5eeb4fc9e3707951b81fbe31de5e3b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 12 Feb 2024 10:39:41 +0100 Subject: [PATCH 1034/1405] selftests: net: cope with slow env in gro.sh test The gro self-tests sends the packets to be aggregated with multiple write operations. When running is slow environment, it's hard to guarantee that the GRO engine will wait for the last packet in an intended train. The above causes almost deterministic failures in our CI for the 'large' test-case. Address the issue explicitly ignoring failures for such case in slow environments (KSFT_MACHINE_SLOW==true). Fixes: 7d1575014a63 ("selftests/net: GRO coalesce test") Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni Link: https://lore.kernel.org/r/97d3ba83f5a2bfeb36f6bc0fb76724eb3dafb608.1707729403.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/gro.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh index 19352f106c1df..02c21ff4ca81f 100755 --- a/tools/testing/selftests/net/gro.sh +++ b/tools/testing/selftests/net/gro.sh @@ -31,6 +31,11 @@ run_test() { 1>>log.txt wait "${server_pid}" exit_code=$? + if [[ ${test} == "large" && -n "${KSFT_MACHINE_SLOW}" && \ + ${exit_code} -ne 0 ]]; then + echo "Ignoring errors due to slow environment" 1>&2 + exit_code=0 + fi if [[ "${exit_code}" -eq 0 ]]; then break; fi -- GitLab From a7ee79b9c4553498c78552d12321d85b645f02ca Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 12 Feb 2024 10:43:31 +0100 Subject: [PATCH 1035/1405] selftests: net: cope with slow env in so_txtime.sh test The mentioned test is failing in slow environments: # SO_TXTIME ipv4 clock monotonic # ./so_txtime: recv: timeout: Resource temporarily unavailable not ok 1 selftests: net: so_txtime.sh # exit=1 Tuning the tolerance in the test binary is error-prone and doomed to failures is slow-enough environment. Just resort to suppress any error in such cases. Note to suppress them we need first to refactor a bit the code moving it to explicit error handling. Fixes: af5136f95045 ("selftests/net: SO_TXTIME with ETF and FQ") Signed-off-by: Paolo Abeni Link: https://lore.kernel.org/r/2142d9ed4b5c5aa07dd1b455779625d91b175373.1707730902.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/so_txtime.sh | 29 ++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh index 3f06f4d286a98..5e861ad32a42e 100755 --- a/tools/testing/selftests/net/so_txtime.sh +++ b/tools/testing/selftests/net/so_txtime.sh @@ -5,6 +5,7 @@ set -e +readonly ksft_skip=4 readonly DEV="veth0" readonly BIN="./so_txtime" @@ -46,7 +47,7 @@ ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}" ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad -do_test() { +run_test() { local readonly IP="$1" local readonly CLOCK="$2" local readonly TXARGS="$3" @@ -64,12 +65,25 @@ do_test() { fi local readonly START="$(date +%s%N --date="+ 0.1 seconds")" + ip netns exec "${NS2}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${RXARGS}" -r & ip netns exec "${NS1}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${TXARGS}" wait "$!" } +do_test() { + run_test $@ + [ $? -ne 0 ] && ret=1 +} + +do_fail_test() { + run_test $@ + [ $? -eq 0 ] && ret=1 +} + ip netns exec "${NS1}" tc qdisc add dev "${DEV}" root fq +set +e +ret=0 do_test 4 mono a,-1 a,-1 do_test 6 mono a,0 a,0 do_test 6 mono a,10 a,10 @@ -77,13 +91,20 @@ do_test 4 mono a,10,b,20 a,10,b,20 do_test 6 mono a,20,b,10 b,20,a,20 if ip netns exec "${NS1}" tc qdisc replace dev "${DEV}" root etf clockid CLOCK_TAI delta 400000; then - ! do_test 4 tai a,-1 a,-1 - ! do_test 6 tai a,0 a,0 + do_fail_test 4 tai a,-1 a,-1 + do_fail_test 6 tai a,0 a,0 do_test 6 tai a,10 a,10 do_test 4 tai a,10,b,20 a,10,b,20 do_test 6 tai a,20,b,10 b,10,a,20 else echo "tc ($(tc -V)) does not support qdisc etf. skipping" + [ $ret -eq 0 ] && ret=$ksft_skip fi -echo OK. All tests passed +if [ $ret -eq 0 ]; then + echo OK. All tests passed +elif [[ $ret -ne $ksft_skip && -n "$KSFT_MACHINE_SLOW" ]]; then + echo "Ignoring errors due to slow environment" 1>&2 + ret=0 +fi +exit $ret -- GitLab From a71d0908e32f3dd41e355d83eeadd44d94811fd6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 12 Feb 2024 11:19:23 +0100 Subject: [PATCH 1036/1405] selftests: net: more strict check in net_helper The helper waiting for a listener port can match any socket whose hexadecimal representation of source or destination addresses matches that of the given port. Additionally, any socket state is accepted. All the above can let the helper return successfully before the relevant listener is actually ready, with unexpected results. So far I could not find any related failure in the netdev CI, but the next patch is going to make the critical event more easily reproducible. Address the issue matching the port hex only vs the relevant socket field and additionally checking the socket state for TCP sockets. Fixes: 3bdd9fd29cb0 ("selftests/net: synchronize udpgro tests' tx and rx connection") Signed-off-by: Paolo Abeni Link: https://lore.kernel.org/r/192b3dbc443d953be32991d1b0ca432bd4c65008.1707731086.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/net_helper.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh index 4fe0befa13fbc..6596fe03c77f4 100644 --- a/tools/testing/selftests/net/net_helper.sh +++ b/tools/testing/selftests/net/net_helper.sh @@ -8,13 +8,16 @@ wait_local_port_listen() local listener_ns="${1}" local port="${2}" local protocol="${3}" - local port_hex + local pattern local i - port_hex="$(printf "%04X" "${port}")" + pattern=":$(printf "%04X" "${port}") " + + # for tcp protocol additionally check the socket state + [ ${protocol} = "tcp" ] && pattern="${pattern}0A" for i in $(seq 10); do - if ip netns exec "${listener_ns}" cat /proc/net/"${protocol}"* | \ - grep -q "${port_hex}"; then + if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \ + /proc/net/"${protocol}"* | grep -q "${pattern}"; then break fi sleep 0.1 -- GitLab From 20622dc934e178ef11fad396eb272597f21bffe2 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 12 Feb 2024 11:19:24 +0100 Subject: [PATCH 1037/1405] selftests: net: more pmtu.sh fixes The netdev CI is reporting failures for the pmtu test: [ 115.929264] br0: port 2(vxlan_a) entered forwarding state # 2024/02/08 17:33:22 socat[7871] E bind(7, {AF=10 [0000:0000:0000:0000:0000:0000:0000:0000]:50000}, 28): Address already in use # 2024/02/08 17:33:22 socat[7877] E write(7, 0x5598fb6ff000, 8192): Connection refused # TEST: IPv6, bridged vxlan4: PMTU exceptions [FAIL] # File size 0 mismatches exepcted value in locally bridged vxlan test The root cause is apparently a socket created by a previous iteration of the relevant loop still lasting in LAST_ACK state. Note that even the file size check is racy, the receiver process dumping the file could still be running in background Allow the listener to bound on the same local port via SO_REUSEADDR and collect file output file size only after the listener completion. Fixes: 136a1b434bbb ("selftests: net: test vxlan pmtu exceptions with tcp") Signed-off-by: Paolo Abeni Link: https://lore.kernel.org/r/4f51c11a1ce7ca7a4dabd926cffff63dadac9ba1.1707731086.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/pmtu.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index d65fdd407d73f..cfc84958025a6 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -1336,16 +1336,16 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { else TCPDST="TCP:[${dst}]:50000" fi - ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000 STDOUT > $tmpoutfile & + ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000,reuseaddr STDOUT > $tmpoutfile & local socat_pid=$! wait_local_port_listen ${NS_B} 50000 tcp dd if=/dev/zero status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3 + wait ${socat_pid} size=$(du -sb $tmpoutfile) size=${size%%/tmp/*} - wait ${socat_pid} [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1 done -- GitLab From 00890f5d15f50bd386bf222ddee355cec6c6d53a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 8 Feb 2024 11:51:29 +0100 Subject: [PATCH 1038/1405] arm64: dts: rockchip: minor rk3588 whitespace cleanup The DTS code coding style expects exactly one space before '{' character. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240208105129.128561-1-krzysztof.kozlowski@linaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts b/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts index d7722772ecd8a..997b516c2533c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-nanopc-t6.dts @@ -189,19 +189,19 @@ cpu-supply = <&vdd_cpu_lit_s0>; }; -&cpu_b0{ +&cpu_b0 { cpu-supply = <&vdd_cpu_big0_s0>; }; -&cpu_b1{ +&cpu_b1 { cpu-supply = <&vdd_cpu_big0_s0>; }; -&cpu_b2{ +&cpu_b2 { cpu-supply = <&vdd_cpu_big1_s0>; }; -&cpu_b3{ +&cpu_b3 { cpu-supply = <&vdd_cpu_big1_s0>; }; -- GitLab From 334bf0710c98d391f4067b72f535d6c4c84dfb6f Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 19 Jan 2024 11:16:56 +0100 Subject: [PATCH 1039/1405] arm64: dts: rockchip: set num-cs property for spi on px30 The px30 has two spi controllers with two chip-selects each. The num-cs property is specified as the total number of chip selects a controllers has and is used since 2020 to find uses of chipselects outside that range in the Rockchip spi driver. Without the property set, the default is 1, so spi devices using the second chipselect will not be created. Fixes: eb1262e3cc8b ("spi: spi-rockchip: use num-cs property and ctlr->enable_gpiods") Signed-off-by: Heiko Stuebner Reviewed-by: Quentin Schulz Link: https://lore.kernel.org/r/20240119101656.965744-1-heiko@sntech.de Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/px30.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi index d0905515399bb..9137dd76e72ce 100644 --- a/arch/arm64/boot/dts/rockchip/px30.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30.dtsi @@ -631,6 +631,7 @@ clock-names = "spiclk", "apb_pclk"; dmas = <&dmac 12>, <&dmac 13>; dma-names = "tx", "rx"; + num-cs = <2>; pinctrl-names = "default"; pinctrl-0 = <&spi0_clk &spi0_csn &spi0_miso &spi0_mosi>; #address-cells = <1>; @@ -646,6 +647,7 @@ clock-names = "spiclk", "apb_pclk"; dmas = <&dmac 14>, <&dmac 15>; dma-names = "tx", "rx"; + num-cs = <2>; pinctrl-names = "default"; pinctrl-0 = <&spi1_clk &spi1_csn0 &spi1_csn1 &spi1_miso &spi1_mosi>; #address-cells = <1>; -- GitLab From 1bbd894e2ae67faf52632bc9290ff926d9b741ea Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Mon, 5 Feb 2024 00:16:43 +0100 Subject: [PATCH 1040/1405] arm64: dts: rockchip: Drop interrupts property from rk3328 pwm-rockchip node The binding doesn't define interrupts and adding such a definition was refused because it's unclear how they should ever be used and the relevant registers are outside the PWM range. So drop them fixing several dtbs_check warnings. Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/5551846d-62cd-4b72-94f4-07541e726c37@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index fb5dcf6e93272..7b4c15c4a9c31 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -488,7 +488,6 @@ pwm3: pwm@ff1b0030 { compatible = "rockchip,rk3328-pwm"; reg = <0x0 0xff1b0030 0x0 0x10>; - interrupts = ; clocks = <&cru SCLK_PWM>, <&cru PCLK_PWM>; clock-names = "pwm", "pclk"; pinctrl-names = "default"; -- GitLab From 11f522256e9043b0fcd2f994278645d3e201d20c Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Thu, 8 Feb 2024 15:31:15 +0530 Subject: [PATCH 1041/1405] bpf: Fix warning for bpf_cpumask in verifier Compiling with CONFIG_BPF_SYSCALL & !CONFIG_BPF_JIT throws the below warning: "WARN: resolve_btfids: unresolved symbol bpf_cpumask" Fix it by adding the appropriate #ifdef. Signed-off-by: Hari Bathini Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Acked-by: Stanislav Fomichev Acked-by: David Vernet Link: https://lore.kernel.org/bpf/20240208100115.602172-1-hbathini@linux.ibm.com --- kernel/bpf/verifier.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 65f598694d550..b263f093ee761 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5227,7 +5227,9 @@ BTF_ID(struct, prog_test_ref_kfunc) #ifdef CONFIG_CGROUPS BTF_ID(struct, cgroup) #endif +#ifdef CONFIG_BPF_JIT BTF_ID(struct, bpf_cpumask) +#endif BTF_ID(struct, task_struct) BTF_SET_END(rcu_protected_types) -- GitLab From c60d847be7b8e69e419e02a2b3d19c2842a3c35d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 12 Feb 2024 19:30:52 +0000 Subject: [PATCH 1042/1405] KVM: arm64: Fix double-free following kvm_pgtable_stage2_free_unlinked() kvm_pgtable_stage2_free_unlinked() does the final put_page() on the root page of the sub-tree before returning, so remove the additional put_page() invocations in the callers. Cc: Ricardo Koller Fixes: f6a27d6dc51b2 ("KVM: arm64: Drop last page ref in kvm_pgtable_stage2_free_removed()") Signed-off-by: Will Deacon Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20240212193052.27765-1-will@kernel.org --- arch/arm64/kvm/hyp/pgtable.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index c651df904fe3e..ab9d05fcf98b2 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1419,7 +1419,6 @@ kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, level + 1); if (ret) { kvm_pgtable_stage2_free_unlinked(mm_ops, pgtable, level); - mm_ops->put_page(pgtable); return ERR_PTR(ret); } @@ -1502,7 +1501,6 @@ static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx, if (!stage2_try_break_pte(ctx, mmu)) { kvm_pgtable_stage2_free_unlinked(mm_ops, childp, level); - mm_ops->put_page(childp); return -EAGAIN; } -- GitLab From c22d03a95b0d815cd186302fdd93f74d99f1c914 Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Thu, 25 Jan 2024 14:19:42 -0600 Subject: [PATCH 1043/1405] arm64: dts: rockchip: Correct Indiedroid Nova GPIO Names Correct the names given to a few of the GPIO pins. The original names were unknowingly based on the header from a pre-production board. The production board has a slightly different pin assignment for the 40-pin GPIO header. Fixes: 3900160e164b ("arm64: dts: rockchip: Add Indiedroid Nova board") Signed-off-by: Chris Morgan Link: https://lore.kernel.org/r/20240125201943.90476-2-macroalpha82@gmail.com Signed-off-by: Heiko Stuebner --- .../boot/dts/rockchip/rk3588s-indiedroid-nova.dts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts b/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts index dc677f29a9c7f..3c22788868519 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts @@ -195,13 +195,13 @@ &gpio1 { gpio-line-names = /* GPIO1 A0-A7 */ - "HEADER_27_3v3", "HEADER_28_3v3", "", "", + "HEADER_27_3v3", "", "", "", "HEADER_29_1v8", "", "HEADER_7_1v8", "", /* GPIO1 B0-B7 */ "", "HEADER_31_1v8", "HEADER_33_1v8", "", "HEADER_11_1v8", "HEADER_13_1v8", "", "", /* GPIO1 C0-C7 */ - "", "", "", "", + "", "HEADER_28_3v3", "", "", "", "", "", "", /* GPIO1 D0-D7 */ "", "", "", "", @@ -225,11 +225,11 @@ &gpio4 { gpio-line-names = /* GPIO4 A0-A7 */ - "", "", "HEADER_37_3v3", "HEADER_32_3v3", - "HEADER_36_3v3", "", "HEADER_35_3v3", "HEADER_38_3v3", + "", "", "HEADER_37_3v3", "HEADER_8_3v3", + "HEADER_10_3v3", "", "HEADER_32_3v3", "HEADER_35_3v3", /* GPIO4 B0-B7 */ "", "", "", "HEADER_40_3v3", - "HEADER_8_3v3", "HEADER_10_3v3", "", "", + "HEADER_38_3v3", "HEADER_36_3v3", "", "", /* GPIO4 C0-C7 */ "", "", "", "", "", "", "", "", -- GitLab From b6ddaa63f728d26c12048aed76be99c24f435c41 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Fri, 19 Jan 2024 11:08:40 -0800 Subject: [PATCH 1044/1405] drm/rockchip: vop2: add a missing unlock in vop2_crtc_atomic_enable() Unlock before returning on the error path. Fixes: 5a028e8f062f ("drm/rockchip: vop2: Add support for rk3588") Signed-off-by: Harshit Mogalapalli Reviewed-by: Sascha Hauer Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20240119190841.1619443-1-harshit.m.mogalapalli@oracle.com --- drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index 85b3b4871a1d6..fdd768bbd487c 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -1985,8 +1985,10 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc, clock = vop2_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags); } - if (!clock) + if (!clock) { + vop2_unlock(vop2); return; + } if (vcstate->output_mode == ROCKCHIP_OUT_MODE_AAAA && !(vp_data->feature & VOP2_VP_FEATURE_OUTPUT_10BIT)) -- GitLab From 0db0c1770834f39e11a2902e20e1f11a482f4465 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 9 Feb 2024 11:18:40 +0000 Subject: [PATCH 1045/1405] ASoC: cs35l56: Workaround for ACPI with broken spk-id-gpios property The ACPI in some SoundWire laptops has a spk-id-gpios property but it points to the wrong Device node. This patch adds a workaround to try to get the GPIO directly from the correct Device node. If the attempt to get the GPIOs from the property fails, the workaround looks for the SDCA node "AF01", which is where the GpioIo resource is defined. If this exists, a spk-id-gpios mapping is added to that node and then the GPIO is got from that node using the property. Signed-off-by: Richard Fitzgerald Link: https://msgid.link/r/20240209111840.1543630-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 93 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 98d3957c66e78..2c1313e34cce4 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -5,6 +5,7 @@ // Copyright (C) 2023 Cirrus Logic, Inc. and // Cirrus Logic International Semiconductor Ltd. +#include #include #include #include @@ -15,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -1260,6 +1262,94 @@ static int cs35l56_get_firmware_uid(struct cs35l56_private *cs35l56) return 0; } +/* + * Some SoundWire laptops have a spk-id-gpios property but it points to + * the wrong ACPI Device node so can't be used to get the GPIO. Try to + * find the SDCA node containing the GpioIo resource and add a GPIO + * mapping to it. + */ +static const struct acpi_gpio_params cs35l56_af01_first_gpio = { 0, 0, false }; +static const struct acpi_gpio_mapping cs35l56_af01_spkid_gpios_mapping[] = { + { "spk-id-gpios", &cs35l56_af01_first_gpio, 1 }, + { } +}; + +static void cs35l56_acpi_dev_release_driver_gpios(void *adev) +{ + acpi_dev_remove_driver_gpios(adev); +} + +static int cs35l56_try_get_broken_sdca_spkid_gpio(struct cs35l56_private *cs35l56) +{ + struct fwnode_handle *af01_fwnode; + const union acpi_object *obj; + struct gpio_desc *desc; + int ret; + + /* Find the SDCA node containing the GpioIo */ + af01_fwnode = device_get_named_child_node(cs35l56->base.dev, "AF01"); + if (!af01_fwnode) { + dev_dbg(cs35l56->base.dev, "No AF01 node\n"); + return -ENOENT; + } + + ret = acpi_dev_get_property(ACPI_COMPANION(cs35l56->base.dev), + "spk-id-gpios", ACPI_TYPE_PACKAGE, &obj); + if (ret) { + dev_dbg(cs35l56->base.dev, "Could not get spk-id-gpios package: %d\n", ret); + return -ENOENT; + } + + /* The broken properties we can handle are a 4-element package (one GPIO) */ + if (obj->package.count != 4) { + dev_warn(cs35l56->base.dev, "Unexpected spk-id element count %d\n", + obj->package.count); + return -ENOENT; + } + + /* Add a GPIO mapping if it doesn't already have one */ + if (!fwnode_property_present(af01_fwnode, "spk-id-gpios")) { + struct acpi_device *adev = to_acpi_device_node(af01_fwnode); + + /* + * Can't use devm_acpi_dev_add_driver_gpios() because the + * mapping isn't being added to the node pointed to by + * ACPI_COMPANION(). + */ + ret = acpi_dev_add_driver_gpios(adev, cs35l56_af01_spkid_gpios_mapping); + if (ret) { + return dev_err_probe(cs35l56->base.dev, ret, + "Failed to add gpio mapping to AF01\n"); + } + + ret = devm_add_action_or_reset(cs35l56->base.dev, + cs35l56_acpi_dev_release_driver_gpios, + adev); + if (ret) + return ret; + + dev_dbg(cs35l56->base.dev, "Added spk-id-gpios mapping to AF01\n"); + } + + desc = fwnode_gpiod_get_index(af01_fwnode, "spk-id", 0, GPIOD_IN, NULL); + if (IS_ERR(desc)) { + ret = PTR_ERR(desc); + return dev_err_probe(cs35l56->base.dev, ret, "Get GPIO from AF01 failed\n"); + } + + ret = gpiod_get_value_cansleep(desc); + gpiod_put(desc); + + if (ret < 0) { + dev_err_probe(cs35l56->base.dev, ret, "Error reading spk-id GPIO\n"); + return ret; + } + + dev_info(cs35l56->base.dev, "Got spk-id from AF01\n"); + + return ret; +} + int cs35l56_common_probe(struct cs35l56_private *cs35l56) { int ret; @@ -1304,6 +1394,9 @@ int cs35l56_common_probe(struct cs35l56_private *cs35l56) } ret = cs35l56_get_speaker_id(&cs35l56->base); + if (ACPI_COMPANION(cs35l56->base.dev) && cs35l56->sdw_peripheral && (ret == -ENOENT)) + ret = cs35l56_try_get_broken_sdca_spkid_gpio(cs35l56); + if ((ret < 0) && (ret != -ENOENT)) goto err; -- GitLab From 2127c604383666675789fd4a5fc2aead46c73aad Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 2 Feb 2024 17:32:20 +0100 Subject: [PATCH 1046/1405] xsk: Add truesize to skb_add_rx_frag(). xsk_build_skb() allocates a page and adds it to the skb via skb_add_rx_frag() and specifies 0 for truesize. This leads to a warning in skb_add_rx_frag() with CONFIG_DEBUG_NET enabled because size is larger than truesize. Increasing truesize requires to add the same amount to socket's sk_wmem_alloc counter in order not to underflow the counter during release in the destructor (sock_wfree()). Pass the size of the allocated page as truesize to skb_add_rx_frag(). Add this mount to socket's sk_wmem_alloc counter. Fixes: cf24f5a5feea ("xsk: add support for AF_XDP multi-buffer on Tx path") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Daniel Borkmann Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/20240202163221.2488589-1-bigeasy@linutronix.de --- net/xdp/xsk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 1eadfac03cc41..b78c0e095e221 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -722,7 +722,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, memcpy(vaddr, buffer, len); kunmap_local(vaddr); - skb_add_rx_frag(skb, nr_frags, page, 0, len, 0); + skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE); + refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc); } if (first_frag && desc->options & XDP_TX_METADATA) { -- GitLab From 8d30528a170905ede9ab6ab81f229e441808590b Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 12 Feb 2024 23:58:24 -0800 Subject: [PATCH 1047/1405] nvmet: remove superfluous initialization Remove superfluous initialization of status variable in nvmet_execute_admin_connect() and nvmet_execute_io_connect(), since it will get overwritten by nvmet_copy_from_sgl(). Signed-off-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/fabrics-cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index d8da840a1c0ed..9964ffe347d2a 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -209,7 +209,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) struct nvmf_connect_command *c = &req->cmd->connect; struct nvmf_connect_data *d; struct nvmet_ctrl *ctrl = NULL; - u16 status = 0; + u16 status; int ret; if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data))) @@ -290,7 +290,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) struct nvmf_connect_data *d; struct nvmet_ctrl *ctrl; u16 qid = le16_to_cpu(c->qid); - u16 status = 0; + u16 status; if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data))) return; -- GitLab From 4e07447503f01766ae976207eb3b947437ed8dbc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 10 Feb 2024 21:01:40 -0500 Subject: [PATCH 1048/1405] bcachefs: Clamp replicas_required to replicas This prevents going emergency read only when the user has specified replicas_required > replicas. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 12 ++++++++++++ fs/bcachefs/btree_update_interior.c | 3 ++- fs/bcachefs/io_write.c | 1 + fs/bcachefs/journal_io.c | 4 +++- fs/bcachefs/journal_reclaim.c | 2 +- fs/bcachefs/super.c | 4 ++-- 6 files changed, 21 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index b80c6c9efd8ce..69d0d60d50e36 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -1249,6 +1249,18 @@ static inline struct stdio_redirect *bch2_fs_stdio_redirect(struct bch_fs *c) return stdio; } +static inline unsigned metadata_replicas_required(struct bch_fs *c) +{ + return min(c->opts.metadata_replicas, + c->opts.metadata_replicas_required); +} + +static inline unsigned data_replicas_required(struct bch_fs *c) +{ + return min(c->opts.data_replicas, + c->opts.data_replicas_required); +} + #define BKEY_PADDED_ONSTACK(key, pad) \ struct { struct bkey_i key; __u64 key ## _pad[pad]; } diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 17a5938aa71a6..4530b14ff2c37 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -280,7 +280,8 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, writepoint_ptr(&c->btree_write_point), &devs_have, res->nr_replicas, - c->opts.metadata_replicas_required, + min(res->nr_replicas, + c->opts.metadata_replicas_required), watermark, 0, cl, &wp); if (unlikely(ret)) return ERR_PTR(ret); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index ef3a53f9045af..2c098ac017b30 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1564,6 +1564,7 @@ CLOSURE_CALLBACK(bch2_write) BUG_ON(!op->write_point.v); BUG_ON(bkey_eq(op->pos, POS_MAX)); + op->nr_replicas_required = min_t(unsigned, op->nr_replicas_required, op->nr_replicas); op->start_time = local_clock(); bch2_keylist_init(&op->insert_keys, op->inline_keys); wbio_init(bio)->put_bio = false; diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index bfd6585e746da..47805193f18cc 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1478,6 +1478,8 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w) c->opts.foreground_target; unsigned i, replicas = 0, replicas_want = READ_ONCE(c->opts.metadata_replicas); + unsigned replicas_need = min_t(unsigned, replicas_want, + READ_ONCE(c->opts.metadata_replicas_required)); rcu_read_lock(); retry: @@ -1526,7 +1528,7 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w) BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX); - return replicas >= c->opts.metadata_replicas_required ? 0 : -EROFS; + return replicas >= replicas_need ? 0 : -EROFS; } static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 820d25e19e5fe..2cf626315652c 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -205,7 +205,7 @@ void bch2_journal_space_available(struct journal *j) j->can_discard = can_discard; - if (nr_online < c->opts.metadata_replicas_required) { + if (nr_online < metadata_replicas_required(c)) { ret = JOURNAL_ERR_insufficient_devices; goto out; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index b9911402b1753..6b23e11825e6d 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1428,10 +1428,10 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, required = max(!(flags & BCH_FORCE_IF_METADATA_DEGRADED) ? c->opts.metadata_replicas - : c->opts.metadata_replicas_required, + : metadata_replicas_required(c), !(flags & BCH_FORCE_IF_DATA_DEGRADED) ? c->opts.data_replicas - : c->opts.data_replicas_required); + : data_replicas_required(c)); return nr_rw >= required; case BCH_MEMBER_STATE_failed: -- GitLab From 2eeccee86dc75a584a8c7e67a8b824d5168c978f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 12 Feb 2024 20:05:48 -0500 Subject: [PATCH 1049/1405] bcachefs: Fix check_version_upgrade() When also downgrading, check_version_upgrade() could pick a new version greater than the latest supported version. Fixes: Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 9127d0e3ca2f6..21e13bb4335be 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -577,8 +577,9 @@ u64 bch2_recovery_passes_from_stable(u64 v) static bool check_version_upgrade(struct bch_fs *c) { - unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version); unsigned latest_version = bcachefs_metadata_version_current; + unsigned latest_compatible = min(latest_version, + bch2_latest_compatible_version(c->sb.version)); unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; unsigned new_version = 0; @@ -597,7 +598,7 @@ static bool check_version_upgrade(struct bch_fs *c) new_version = latest_version; break; case BCH_VERSION_UPGRADE_none: - new_version = old_version; + new_version = min(old_version, latest_version); break; } } @@ -774,7 +775,7 @@ int bch2_fs_recovery(struct bch_fs *c) goto err; } - if (!(c->opts.nochanges && c->opts.norecovery)) { + if (!c->opts.nochanges) { mutex_lock(&c->sb_lock); bool write_sb = false; @@ -804,7 +805,7 @@ int bch2_fs_recovery(struct bch_fs *c) if (bch2_check_version_downgrade(c)) { struct printbuf buf = PRINTBUF; - prt_str(&buf, "Version downgrade required:\n"); + prt_str(&buf, "Version downgrade required:"); __le64 passes = ext->recovery_passes_required[0]; bch2_sb_set_downgrade(c, @@ -812,7 +813,7 @@ int bch2_fs_recovery(struct bch_fs *c) BCH_VERSION_MINOR(c->sb.version)); passes = ext->recovery_passes_required[0] & ~passes; if (passes) { - prt_str(&buf, " running recovery passes: "); + prt_str(&buf, "\n running recovery passes: "); prt_bitflags(&buf, bch2_recovery_passes, bch2_recovery_passes_from_stable(le64_to_cpu(passes))); } -- GitLab From 816054f40a5fdaaed05d9e7f603d2824eeee7e62 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 13 Feb 2024 20:26:09 -0500 Subject: [PATCH 1050/1405] bcachefs: Fix missing va_end() Fixes: https://lore.kernel.org/linux-bcachefs/202402131603.E953E2CF@keescook/T/#u Reported-by: coverity scan Signed-off-by: Kent Overstreet --- fs/bcachefs/printbuf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/printbuf.c b/fs/bcachefs/printbuf.c index accf246c32330..b27d22925929a 100644 --- a/fs/bcachefs/printbuf.c +++ b/fs/bcachefs/printbuf.c @@ -56,6 +56,7 @@ void bch2_prt_vprintf(struct printbuf *out, const char *fmt, va_list args) va_copy(args2, args); len = vsnprintf(out->buf + out->pos, printbuf_remaining(out), fmt, args2); + va_end(args2); } while (len + 1 >= printbuf_remaining(out) && !bch2_printbuf_make_room(out, len + 1)); -- GitLab From 1fba2bf8e9d5a27b7394856181b6200de7260b79 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 14 Feb 2024 11:00:41 +1100 Subject: [PATCH 1051/1405] Revert "powerpc/pseries/iommu: Fix iommu initialisation during DLPAR add" This reverts commit ed8b94f6e0acd652ce69bd69d678a0c769172df8. Gaurav reported that there are still problems with the patch and it should be reverted pending a fuller fix. Link: https://lore.kernel.org/all/4f6fc1ac-7a76-4447-9d0e-f55c0be373f8@linux.ibm.com/ Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-pci.h | 3 --- arch/powerpc/kernel/iommu.c | 21 +++++---------------- arch/powerpc/platforms/pseries/pci_dlpar.c | 4 ---- 3 files changed, 5 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index c3a3f3df36d16..ce2b1b5eebddc 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -29,9 +29,6 @@ void *pci_traverse_device_nodes(struct device_node *start, void *(*fn)(struct device_node *, void *), void *data); extern void pci_devs_phb_init_dynamic(struct pci_controller *phb); -extern void ppc_iommu_register_device(struct pci_controller *phb); -extern void ppc_iommu_unregister_device(struct pci_controller *phb); - /* From rtas_pci.h */ extern void init_pci_config_tokens (void); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index c6f62e130d55c..ebe259bdd4629 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1388,21 +1388,6 @@ static const struct attribute_group *spapr_tce_iommu_groups[] = { NULL, }; -void ppc_iommu_register_device(struct pci_controller *phb) -{ - iommu_device_sysfs_add(&phb->iommu, phb->parent, - spapr_tce_iommu_groups, "iommu-phb%04x", - phb->global_number); - iommu_device_register(&phb->iommu, &spapr_tce_iommu_ops, - phb->parent); -} - -void ppc_iommu_unregister_device(struct pci_controller *phb) -{ - iommu_device_unregister(&phb->iommu); - iommu_device_sysfs_remove(&phb->iommu); -} - /* * This registers IOMMU devices of PHBs. This needs to happen * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and @@ -1413,7 +1398,11 @@ static int __init spapr_tce_setup_phb_iommus_initcall(void) struct pci_controller *hose; list_for_each_entry(hose, &hose_list, list_node) { - ppc_iommu_register_device(hose); + iommu_device_sysfs_add(&hose->iommu, hose->parent, + spapr_tce_iommu_groups, "iommu-phb%04x", + hose->global_number); + iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops, + hose->parent); } return 0; } diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 4448386268d99..4ba8245681192 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -35,8 +35,6 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn) pseries_msi_allocate_domains(phb); - ppc_iommu_register_device(phb); - /* Create EEH devices for the PHB */ eeh_phb_pe_create(phb); @@ -78,8 +76,6 @@ int remove_phb_dynamic(struct pci_controller *phb) } } - ppc_iommu_unregister_device(phb); - pseries_msi_free_domains(phb); /* Keep a reference so phb isn't freed yet */ -- GitLab From f1acb109505d983779bbb7e20a1ee6244d2b5736 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 12 Feb 2024 17:42:44 +1100 Subject: [PATCH 1052/1405] powerpc/kasan: Limit KASAN thread size increase to 32KB KASAN is seen to increase stack usage, to the point that it was reported to lead to stack overflow on some 32-bit machines (see link). To avoid overflows the stack size was doubled for KASAN builds in commit 3e8635fb2e07 ("powerpc/kasan: Force thread size increase with KASAN"). However with a 32KB stack size to begin with, the doubling leads to a 64KB stack, which causes build errors: arch/powerpc/kernel/switch.S:249: Error: operand out of range (0x000000000000fe50 is not between 0xffffffffffff8000 and 0x0000000000007fff) Although the asm could be reworked, in practice a 32KB stack seems sufficient even for KASAN builds - the additional usage seems to be in the 2-3KB range for a 64-bit KASAN build. So only increase the stack for KASAN if the stack size is < 32KB. Fixes: 18f14afe2816 ("powerpc/64s: Increase default stack size to 32KB") Reported-by: Spoorthy Reported-by: Benjamin Gray Reviewed-by: Benjamin Gray Link: https://lore.kernel.org/linuxppc-dev/bug-207129-206035@https.bugzilla.kernel.org%2F/ Signed-off-by: Michael Ellerman Link: https://msgid.link/20240212064244.3924505-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/thread_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index bf5dde1a41147..15c5691dd2184 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -14,7 +14,7 @@ #ifdef __KERNEL__ -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN) && CONFIG_THREAD_SHIFT < 15 #define MIN_THREAD_SHIFT (CONFIG_THREAD_SHIFT + 1) #else #define MIN_THREAD_SHIFT CONFIG_THREAD_SHIFT -- GitLab From eb6d871f4ba49ac8d0537e051fe983a3a4027f61 Mon Sep 17 00:00:00 2001 From: David Engraf Date: Wed, 7 Feb 2024 10:27:58 +0100 Subject: [PATCH 1053/1405] powerpc/cputable: Add missing PPC_FEATURE_BOOKE on PPC64 Book-E Commit e320a76db4b0 ("powerpc/cputable: Split cpu_specs[] out of cputable.h") moved the cpu_specs to separate header files. Previously PPC_FEATURE_BOOKE was enabled by CONFIG_PPC_BOOK3E_64. The definition in cpu_specs_e500mc.h for PPC64 no longer enables PPC_FEATURE_BOOKE. This breaks user space reading the ELF hwcaps and expect PPC_FEATURE_BOOKE. Debugging an application with gdb is no longer working on e5500/e6500 because the 64-bit detection relies on PPC_FEATURE_BOOKE for Book-E. Fixes: e320a76db4b0 ("powerpc/cputable: Split cpu_specs[] out of cputable.h") Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: David Engraf Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/20240207092758.1058893-1-david.engraf@sysgo.com --- arch/powerpc/kernel/cpu_specs_e500mc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/cpu_specs_e500mc.h b/arch/powerpc/kernel/cpu_specs_e500mc.h index ceb06b109f831..2ae8e9a7b461c 100644 --- a/arch/powerpc/kernel/cpu_specs_e500mc.h +++ b/arch/powerpc/kernel/cpu_specs_e500mc.h @@ -8,7 +8,8 @@ #ifdef CONFIG_PPC64 #define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ - PPC_FEATURE_HAS_FPU | PPC_FEATURE_64) + PPC_FEATURE_HAS_FPU | PPC_FEATURE_64 | \ + PPC_FEATURE_BOOKE) #else #define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ PPC_FEATURE_BOOKE) -- GitLab From ea73179e64131bcd29ba6defd33732abdf8ca14b Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Tue, 13 Feb 2024 23:24:10 +0530 Subject: [PATCH 1054/1405] powerpc/ftrace: Ignore ftrace locations in exit text sections Michael reported that we are seeing an ftrace bug on bootup when KASAN is enabled and we are using -fpatchable-function-entry: ftrace: allocating 47780 entries in 18 pages ftrace-powerpc: 0xc0000000020b3d5c: No module provided for non-kernel address ------------[ ftrace bug ]------------ ftrace faulted on modifying [] 0xc0000000020b3d5c Initializing ftrace call sites ftrace record flags: 0 (0) expected tramp: c00000000008cef4 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at kernel/trace/ftrace.c:2180 ftrace_bug+0x3c0/0x424 Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 6.5.0-rc3-00120-g0f71dcfb4aef #860 Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,HEAD hv:linux,kvm pSeries NIP: c0000000003aa81c LR: c0000000003aa818 CTR: 0000000000000000 REGS: c0000000033cfab0 TRAP: 0700 Not tainted (6.5.0-rc3-00120-g0f71dcfb4aef) MSR: 8000000002021033 CR: 28028240 XER: 00000000 CFAR: c0000000002781a8 IRQMASK: 3 ... NIP [c0000000003aa81c] ftrace_bug+0x3c0/0x424 LR [c0000000003aa818] ftrace_bug+0x3bc/0x424 Call Trace: ftrace_bug+0x3bc/0x424 (unreliable) ftrace_process_locs+0x5f4/0x8a0 ftrace_init+0xc0/0x1d0 start_kernel+0x1d8/0x484 With CONFIG_FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY=y and CONFIG_KASAN=y, compiler emits nops in functions that it generates for registering and unregistering global variables (unlike with -pg and -mprofile-kernel where calls to _mcount() are not generated in those functions). Those functions then end up in INIT_TEXT and EXIT_TEXT respectively. We don't expect to see any profiled functions in EXIT_TEXT, so ftrace_init_nop() assumes that all addresses that aren't in the core kernel text belongs to a module. Since these functions do not match that criteria, we see the above bug. Address this by having ftrace ignore all locations in the text exit sections of vmlinux. Fixes: 0f71dcfb4aef ("powerpc/ftrace: Add support for -fpatchable-function-entry") Cc: stable@vger.kernel.org # v6.6+ Reported-by: Michael Ellerman Signed-off-by: Naveen N Rao Reviewed-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20240213175410.1091313-1-naveen@kernel.org --- arch/powerpc/include/asm/ftrace.h | 10 ++-------- arch/powerpc/include/asm/sections.h | 1 + arch/powerpc/kernel/trace/ftrace.c | 12 ++++++++++++ arch/powerpc/kernel/trace/ftrace_64_pg.c | 5 +++++ arch/powerpc/kernel/vmlinux.lds.S | 2 ++ 5 files changed, 22 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 1ebd2ca97f120..107fc5a484569 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -20,14 +20,6 @@ #ifndef __ASSEMBLY__ extern void _mcount(void); -static inline unsigned long ftrace_call_adjust(unsigned long addr) -{ - if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) - addr += MCOUNT_INSN_SIZE; - - return addr; -} - unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp); @@ -142,8 +134,10 @@ static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; } #ifdef CONFIG_FUNCTION_TRACER extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; void ftrace_free_init_tramp(void); +unsigned long ftrace_call_adjust(unsigned long addr); #else static inline void ftrace_free_init_tramp(void) { } +static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; } #endif #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index ea26665f82cfc..f43f3a6b0051c 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -14,6 +14,7 @@ typedef struct func_desc func_desc_t; extern char __head_end[]; extern char __srwx_boundary[]; +extern char __exittext_begin[], __exittext_end[]; /* Patch sites */ extern s32 patch__call_flush_branch_caches1; diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 82010629cf887..d8d6b4fd9a14c 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -27,10 +27,22 @@ #include #include #include +#include #define NUM_FTRACE_TRAMPS 2 static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; +unsigned long ftrace_call_adjust(unsigned long addr) +{ + if (addr >= (unsigned long)__exittext_begin && addr < (unsigned long)__exittext_end) + return 0; + + if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) + addr += MCOUNT_INSN_SIZE; + + return addr; +} + static ppc_inst_t ftrace_create_branch_inst(unsigned long ip, unsigned long addr, int link) { ppc_inst_t op; diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c index 7b85c3b460a3c..12fab1803bcf4 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.c +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c @@ -37,6 +37,11 @@ #define NUM_FTRACE_TRAMPS 8 static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; +unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} + static ppc_inst_t ftrace_call_replace(unsigned long ip, unsigned long addr, int link) { diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 1c5970df32336..f420df7888a75 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -281,7 +281,9 @@ SECTIONS * to deal with references from __bug_table */ .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { + __exittext_begin = .; EXIT_TEXT + __exittext_end = .; } . = ALIGN(PAGE_SIZE); -- GitLab From cbecc9fcbbec60136b0180ba0609c829afed5c81 Mon Sep 17 00:00:00 2001 From: Shrikanth Hegde Date: Tue, 13 Feb 2024 10:56:35 +0530 Subject: [PATCH 1055/1405] powerpc/pseries: fix accuracy of stolen time powerVM hypervisor updates the VPA fields with stolen time data. It currently reports enqueue_dispatch_tb and ready_enqueue_tb for this purpose. In linux these two fields are used to report the stolen time. The VPA fields are updated at the TB frequency. On powerPC its mostly set at 512Mhz. Hence this needs a conversion to ns when reporting it back as rest of the kernel timings are in ns. This conversion is already handled in tb_to_ns function. So use that function to report accurate stolen time. Observed this issue and used an Capped Shared Processor LPAR(SPLPAR) to simplify the experiments. In all these cases, 100% VP Load is run using stress-ng workload. Values of stolen time is in percentages as reported by mpstat. With the patch values are close to expected. 6.8.rc1 +Patch 12EC/12VP 0.0 0.0 12EC/24VP 25.7 50.2 12EC/36VP 37.3 69.2 12EC/48VP 38.5 78.3 Fixes: 0e8a63132800 ("powerpc/pseries: Implement CONFIG_PARAVIRT_TIME_ACCOUNTING") Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: Shrikanth Hegde Reviewed-by: Nicholas Piggin Reviewed-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://msgid.link/20240213052635.231597-1-sshegde@linux.ibm.com --- arch/powerpc/platforms/pseries/lpar.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 4561667832ed4..4e9916bb03d71 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -662,8 +662,12 @@ u64 pseries_paravirt_steal_clock(int cpu) { struct lppaca *lppaca = &lppaca_of(cpu); - return be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) + - be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb)); + /* + * VPA steal time counters are reported at TB frequency. Hence do a + * conversion to ns before returning + */ + return tb_to_ns(be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) + + be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb))); } #endif -- GitLab From ce6b6d1513965f500a05f3facf223fa01fd74920 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Tue, 13 Feb 2024 19:45:40 +0000 Subject: [PATCH 1056/1405] riscv: dts: sifive: add missing #interrupt-cells to pmic At W=2 dtc complains: hifive-unmatched-a00.dts:120.10-238.4: Warning (interrupt_provider): /soc/i2c@10030000/pmic@58: Missing '#interrupt-cells' in interrupt provider Add the missing property. Reviewed-by: Samuel Holland Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 07387f9c135ca..72b87b08ab444 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -123,6 +123,7 @@ interrupt-parent = <&gpio>; interrupts = <1 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; onkey { compatible = "dlg,da9063-onkey"; -- GitLab From 706c1fa1ab09f11a131fc4d699ce4c0224b1cb2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-Lo=C3=AFc=20Charroud?= Date: Wed, 14 Feb 2024 00:38:31 +0100 Subject: [PATCH 1057/1405] ALSA: hda/realtek: cs35l41: Add internal speaker support for ASUS UM3402 with missing DSD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the values for the missing DSD properties to the cs35l41 config table. Signed-off-by: Jean-Loïc Charroud Link: https://lore.kernel.org/r/1435594585.650325975.1707867511062.JavaMail.zimbra@free.fr Signed-off-by: Takashi Iwai --- sound/pci/hda/cs35l41_hda_property.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c index 57b21285ab6a6..87dcb367e2391 100644 --- a/sound/pci/hda/cs35l41_hda_property.c +++ b/sound/pci/hda/cs35l41_hda_property.c @@ -91,6 +91,7 @@ static const struct cs35l41_config cs35l41_config_table[] = { { "10431D1F", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, { "10431DA2", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, { "10431E02", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, + { "10431E12", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, { "10431EE2", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, -1, -1, 0, 0, 0 }, { "10431F12", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, { "10431F1F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 0, 0, 0 }, @@ -428,6 +429,7 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = { { "CSC3551", "10431D1F", generic_dsd_config }, { "CSC3551", "10431DA2", generic_dsd_config }, { "CSC3551", "10431E02", generic_dsd_config }, + { "CSC3551", "10431E12", generic_dsd_config }, { "CSC3551", "10431EE2", generic_dsd_config }, { "CSC3551", "10431F12", generic_dsd_config }, { "CSC3551", "10431F1F", generic_dsd_config }, -- GitLab From b91050448897663b60b6d15525c8c3ecae28a368 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-Lo=C3=AFc=20Charroud?= Date: Wed, 14 Feb 2024 00:42:12 +0100 Subject: [PATCH 1058/1405] ALSA: hda/realtek: cs35l41: Fix device ID / model name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The patch 51d976079976c800ef19ed1b542602fcf63f0edb ("ALSA: hda/realtek: Add quirks for ASUS Zenbook 2022 Models") modified the entry 1043:1e2e from "ASUS UM3402" to "ASUS UM6702RA/RC" and added another entry for "ASUS UM3402" with 104e:1ee2. The first entry was correct, while the new one corresponds to model "ASUS UM6702RA/RC" Fix the model names for both devices. Fixes: 51d976079976 ("ALSA: hda/realtek: Add quirks for ASUS Zenbook 2022 Models") Signed-off-by: Jean-Loïc Charroud Link: https://lore.kernel.org/r/1656546983.650349575.1707867732866.JavaMail.zimbra@free.fr Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 98886b803bc55..c8adef01e9ec3 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10051,11 +10051,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x16a3, "ASUS UX3402VA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), - SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), SND_PCI_QUIRK(0x1043, 0x1e5e, "ASUS ROG Strix G513", ALC294_FIXUP_ASUS_G513_PINS), SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), - SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1c52, "ASUS Zephyrus G15 2022", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1f12, "ASUS UM5302", ALC287_FIXUP_CS35L41_I2C_2), -- GitLab From 852d432a14dbcd34e15a3a3910c5c6869a6d1929 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-Lo=C3=AFc=20Charroud?= Date: Wed, 14 Feb 2024 00:44:24 +0100 Subject: [PATCH 1059/1405] ALSA: hda/realtek: cs35l41: Fix order and duplicates in quirks table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move entry {0x1043, 0x16a3, "ASUS UX3402VA"} following device ID order. Remove duplicate entry for device {0x1043, 0x1f62, "ASUS UX7602ZM"}. Fixes: 51d976079976 ("ALSA: hda/realtek: Add quirks for ASUS Zenbook 2022 Models") Signed-off-by: Jean-Loïc Charroud Link: https://lore.kernel.org/r/1969151851.650354669.1707867864074.JavaMail.zimbra@free.fr Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c8adef01e9ec3..5d0517465f085 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10005,6 +10005,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK), SND_PCI_QUIRK(0x1043, 0x1663, "ASUS GU603ZI/ZJ/ZQ/ZU/ZV", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1683, "ASUS UM3402YAR", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x16a3, "ASUS UX3402VA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x16b2, "ASUS GU603", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x16d3, "ASUS UX5304VA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC), @@ -10048,8 +10049,6 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1da2, "ASUS UP6502ZA/ZD", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402ZA", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x16a3, "ASUS UX3402VA", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), -- GitLab From eb5c7465c3240151cd42a55c7ace9da0026308a1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 11:07:13 +0100 Subject: [PATCH 1060/1405] RDMA/srpt: fix function pointer cast warnings clang-16 notices that srpt_qp_event() gets called through an incompatible pointer here: drivers/infiniband/ulp/srpt/ib_srpt.c:1815:5: error: cast from 'void (*)(struct ib_event *, struct srpt_rdma_ch *)' to 'void (*)(struct ib_event *, void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1815 | = (void(*)(struct ib_event *, void*))srpt_qp_event; Change srpt_qp_event() to use the correct prototype and adjust the argument inside of it. Fixes: a42d985bd5b2 ("ib_srpt: Initial SRP Target merge for v3.3-rc1") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240213100728.458348-1-arnd@kernel.org Reviewed-by: Bart Van Assche Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/srpt/ib_srpt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index d2dce6ce30a94..040234c01be4d 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -214,10 +214,12 @@ static const char *get_ch_state_name(enum rdma_ch_state s) /** * srpt_qp_event - QP event callback function * @event: Description of the event that occurred. - * @ch: SRPT RDMA channel. + * @ptr: SRPT RDMA channel. */ -static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) +static void srpt_qp_event(struct ib_event *event, void *ptr) { + struct srpt_rdma_ch *ch = ptr; + pr_debug("QP event %d on ch=%p sess_name=%s-%d state=%s\n", event->event, ch, ch->sess_name, ch->qp->qp_num, get_ch_state_name(ch->state)); @@ -1811,8 +1813,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) ch->cq_size = ch->rq_size + sq_size; qp_init->qp_context = (void *)ch; - qp_init->event_handler - = (void(*)(struct ib_event *, void*))srpt_qp_event; + qp_init->event_handler = srpt_qp_event; qp_init->send_cq = ch->cq; qp_init->recv_cq = ch->cq; qp_init->sq_sig_type = IB_SIGNAL_REQ_WR; -- GitLab From 858b31133dbec88465bcc0a006f4dc43173662b8 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 12 Feb 2024 00:30:38 +0530 Subject: [PATCH 1061/1405] octeontx2-af: Remove the PF_FUNC validation for NPC transmit rules NPC transmit side mcam rules can use the pcifunc (in packet metadata added by hardware) of transmitting device for mcam lookup similar to the channel of receiving device at receive side. The commit 18603683d766 ("octeontx2-af: Remove channel verification while installing MCAM rules") removed the receive side channel verification to save hardware MCAM filters while switching packets across interfaces but missed removing transmit side checks. This patch removes transmit side rules validation. Fixes: 18603683d766 ("octeontx2-af: Remove channel verification while installing MCAM rules") Signed-off-by: Subbaraya Sundeep Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/af/rvu_npc.c | 32 ------------------- 1 file changed, 32 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 8cfd74ad991cc..e5d6156655ba4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -61,28 +61,6 @@ int rvu_npc_get_tx_nibble_cfg(struct rvu *rvu, u64 nibble_ena) return 0; } -static int npc_mcam_verify_pf_func(struct rvu *rvu, - struct mcam_entry *entry_data, u8 intf, - u16 pcifunc) -{ - u16 pf_func, pf_func_mask; - - if (is_npc_intf_rx(intf)) - return 0; - - pf_func_mask = (entry_data->kw_mask[0] >> 32) & - NPC_KEX_PF_FUNC_MASK; - pf_func = (entry_data->kw[0] >> 32) & NPC_KEX_PF_FUNC_MASK; - - pf_func = be16_to_cpu((__force __be16)pf_func); - if (pf_func_mask != NPC_KEX_PF_FUNC_MASK || - ((pf_func & ~RVU_PFVF_FUNC_MASK) != - (pcifunc & ~RVU_PFVF_FUNC_MASK))) - return -EINVAL; - - return 0; -} - void rvu_npc_set_pkind(struct rvu *rvu, int pkind, struct rvu_pfvf *pfvf) { int blkaddr; @@ -2851,12 +2829,6 @@ int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu, else nix_intf = pfvf->nix_rx_intf; - if (!is_pffunc_af(pcifunc) && - npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, pcifunc)) { - rc = NPC_MCAM_INVALID_REQ; - goto exit; - } - /* For AF installed rules, the nix_intf should be set to target NIX */ if (is_pffunc_af(req->hdr.pcifunc)) nix_intf = req->intf; @@ -3208,10 +3180,6 @@ int rvu_mbox_handler_npc_mcam_alloc_and_write_entry(struct rvu *rvu, if (!is_npc_interface_valid(rvu, req->intf)) return NPC_MCAM_INVALID_REQ; - if (npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, - req->hdr.pcifunc)) - return NPC_MCAM_INVALID_REQ; - /* Try to allocate a MCAM entry */ entry_req.hdr.pcifunc = req->hdr.pcifunc; entry_req.contig = true; -- GitLab From 4b085736e44dbbe69b5eea1a8a294f404678a1f4 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 11 Jan 2024 20:51:22 +0900 Subject: [PATCH 1062/1405] ata: libata-core: Do not try to set sleeping devices to standby In ata ata_dev_power_set_standby(), check that the target device is not sleeping. If it is, there is no need to do anything. Fixes: aa3998dbeb3a ("ata: libata-scsi: Disable scsi device manage_system_start_stop") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/libata-core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 09ed67772fae4..d9f80f4f70f5b 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2017,6 +2017,10 @@ void ata_dev_power_set_standby(struct ata_device *dev) struct ata_taskfile tf; unsigned int err_mask; + /* If the device is already sleeping, do nothing. */ + if (dev->flags & ATA_DFLAG_SLEEPING) + return; + /* * Some odd clown BIOSes issue spindown on power off (ACPI S4 or S5) * causing some drives to spin up and down again. For these, do nothing -- GitLab From 6cf9ff463317217d95732a6cce6fbdd12508921a Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Mon, 12 Feb 2024 17:34:02 +0300 Subject: [PATCH 1063/1405] net: smc: fix spurious error message from __sock_release() Commit 67f562e3e147 ("net/smc: transfer fasync_list in case of fallback") leaves the socket's fasync list pointer within a container socket as well. When the latter is destroyed, '__sock_release()' warns about its non-empty fasync list, which is a dangling pointer to previously freed fasync list of an underlying TCP socket. Fix this spurious warning by nullifying fasync list of a container socket. Fixes: 67f562e3e147 ("net/smc: transfer fasync_list in case of fallback") Signed-off-by: Dmitry Antipov Signed-off-by: David S. Miller --- net/smc/af_smc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index a2cb30af46cb1..0f53a5c6fd9d9 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -924,6 +924,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) smc->clcsock->file->private_data = smc->clcsock; smc->clcsock->wq.fasync_list = smc->sk.sk_socket->wq.fasync_list; + smc->sk.sk_socket->wq.fasync_list = NULL; /* There might be some wait entries remaining * in smc sk->sk_wq and they should be woken up -- GitLab From 5d07e432cb387b9f7d4f0e07245705744c7fb05b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 11:07:50 +0100 Subject: [PATCH 1064/1405] bnad: fix work_queue type mismatch clang-16 warns about a function pointer cast: drivers/net/ethernet/brocade/bna/bnad.c:1995:4: error: cast from 'void (*)(struct delayed_work *)' to 'work_func_t' (aka 'void (*)(struct work_struct *)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1995 | (work_func_t)bnad_tx_cleanup); drivers/net/ethernet/brocade/bna/bnad.c:2252:4: error: cast from 'void (*)(void *)' to 'work_func_t' (aka 'void (*)(struct work_struct *)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 2252 | (work_func_t)(bnad_rx_cleanup)); The problem here is mixing up work_struct and delayed_work, which relies the former being the first member of the latter. Change the code to use consistent types here to address the warning and make it more robust against workqueue interface changes. Side note: the use of a delayed workqueue for cleaning up TX descriptors is probably a bad idea since this introduces a noticeable delay. The driver currently does not appear to use BQL, but if one wanted to add that, this would have to be changed as well. Fixes: 01b54b145185 ("bna: tx rx cleanup fix") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bnad.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 31191b520b587..c32174484a967 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -1091,10 +1091,10 @@ bnad_cb_tx_resume(struct bnad *bnad, struct bna_tx *tx) * Free all TxQs buffers and then notify TX_E_CLEANUP_DONE to Tx fsm. */ static void -bnad_tx_cleanup(struct delayed_work *work) +bnad_tx_cleanup(struct work_struct *work) { struct bnad_tx_info *tx_info = - container_of(work, struct bnad_tx_info, tx_cleanup_work); + container_of(work, struct bnad_tx_info, tx_cleanup_work.work); struct bnad *bnad = NULL; struct bna_tcb *tcb; unsigned long flags; @@ -1170,7 +1170,7 @@ bnad_cb_rx_stall(struct bnad *bnad, struct bna_rx *rx) * Free all RxQs buffers and then notify RX_E_CLEANUP_DONE to Rx fsm. */ static void -bnad_rx_cleanup(void *work) +bnad_rx_cleanup(struct work_struct *work) { struct bnad_rx_info *rx_info = container_of(work, struct bnad_rx_info, rx_cleanup_work); @@ -1991,8 +1991,7 @@ bnad_setup_tx(struct bnad *bnad, u32 tx_id) } tx_info->tx = tx; - INIT_DELAYED_WORK(&tx_info->tx_cleanup_work, - (work_func_t)bnad_tx_cleanup); + INIT_DELAYED_WORK(&tx_info->tx_cleanup_work, bnad_tx_cleanup); /* Register ISR for the Tx object */ if (intr_info->intr_type == BNA_INTR_T_MSIX) { @@ -2248,8 +2247,7 @@ bnad_setup_rx(struct bnad *bnad, u32 rx_id) rx_info->rx = rx; spin_unlock_irqrestore(&bnad->bna_lock, flags); - INIT_WORK(&rx_info->rx_cleanup_work, - (work_func_t)(bnad_rx_cleanup)); + INIT_WORK(&rx_info->rx_cleanup_work, bnad_rx_cleanup); /* * Init NAPI, so that state is set to NAPI_STATE_SCHED, -- GitLab From 9b23fceb4158a3636ce4a2bda28ab03dcfa6a26f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 11:16:34 +0100 Subject: [PATCH 1065/1405] ethernet: cpts: fix function pointer cast warnings clang-16 warns about the mismatched prototypes for the devm_* callbacks: drivers/net/ethernet/ti/cpts.c:691:12: error: cast from 'void (*)(struct clk_hw *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 691 | (void(*)(void *))clk_hw_unregister_mux, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/device.h:406:34: note: expanded from macro 'devm_add_action_or_reset' 406 | __devm_add_action_or_reset(dev, action, data, #action) | ^~~~~~ drivers/net/ethernet/ti/cpts.c:703:12: error: cast from 'void (*)(struct device_node *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 703 | (void(*)(void *))of_clk_del_provider, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/device.h:406:34: note: expanded from macro 'devm_add_action_or_reset' 406 | __devm_add_action_or_reset(dev, action, data, #action) Use separate helper functions for this instead, using the expected prototypes with a void* argument. Fixes: a3047a81ba13 ("net: ethernet: ti: cpts: add support for ext rftclk selection") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpts.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c index bcccf43d368b7..dbbea91460405 100644 --- a/drivers/net/ethernet/ti/cpts.c +++ b/drivers/net/ethernet/ti/cpts.c @@ -638,6 +638,16 @@ static void cpts_calc_mult_shift(struct cpts *cpts) freq, cpts->cc.mult, cpts->cc.shift, (ns - NSEC_PER_SEC)); } +static void cpts_clk_unregister(void *clk) +{ + clk_hw_unregister_mux(clk); +} + +static void cpts_clk_del_provider(void *np) +{ + of_clk_del_provider(np); +} + static int cpts_of_mux_clk_setup(struct cpts *cpts, struct device_node *node) { struct device_node *refclk_np; @@ -687,9 +697,7 @@ static int cpts_of_mux_clk_setup(struct cpts *cpts, struct device_node *node) goto mux_fail; } - ret = devm_add_action_or_reset(cpts->dev, - (void(*)(void *))clk_hw_unregister_mux, - clk_hw); + ret = devm_add_action_or_reset(cpts->dev, cpts_clk_unregister, clk_hw); if (ret) { dev_err(cpts->dev, "add clkmux unreg action %d", ret); goto mux_fail; @@ -699,8 +707,7 @@ static int cpts_of_mux_clk_setup(struct cpts *cpts, struct device_node *node) if (ret) goto mux_fail; - ret = devm_add_action_or_reset(cpts->dev, - (void(*)(void *))of_clk_del_provider, + ret = devm_add_action_or_reset(cpts->dev, cpts_clk_del_provider, refclk_np); if (ret) { dev_err(cpts->dev, "add clkmux provider unreg action %d", ret); -- GitLab From 6cdedc18ba7b9dacc36466e27e3267d201948c8d Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Fri, 21 Jul 2023 09:22:26 -0700 Subject: [PATCH 1066/1405] can: j1939: prevent deadlock by changing j1939_socks_lock to rwlock The following 3 locks would race against each other, causing the deadlock situation in the Syzbot bug report: - j1939_socks_lock - active_session_list_lock - sk_session_queue_lock A reasonable fix is to change j1939_socks_lock to an rwlock, since in the rare situations where a write lock is required for the linked list that j1939_socks_lock is protecting, the code does not attempt to acquire any more locks. This would break the circular lock dependency, where, for example, the current thread already locks j1939_socks_lock and attempts to acquire sk_session_queue_lock, and at the same time, another thread attempts to acquire j1939_socks_lock while holding sk_session_queue_lock. NOTE: This patch along does not fix the unregister_netdevice bug reported by Syzbot; instead, it solves a deadlock situation to prepare for one or more further patches to actually fix the Syzbot bug, which appears to be a reference counting problem within the j1939 codebase. Reported-by: Signed-off-by: Ziqi Zhao Reviewed-by: Oleksij Rempel Acked-by: Oleksij Rempel Link: https://lore.kernel.org/all/20230721162226.8639-1-astrajoan@yahoo.com [mkl: remove unrelated newline change] Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- net/can/j1939/j1939-priv.h | 2 +- net/can/j1939/main.c | 2 +- net/can/j1939/socket.c | 24 ++++++++++++------------ 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h index 16af1a7f80f60..74f15592d1708 100644 --- a/net/can/j1939/j1939-priv.h +++ b/net/can/j1939/j1939-priv.h @@ -86,7 +86,7 @@ struct j1939_priv { unsigned int tp_max_packet_size; /* lock for j1939_socks list */ - spinlock_t j1939_socks_lock; + rwlock_t j1939_socks_lock; struct list_head j1939_socks; struct kref rx_kref; diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index ecff1c947d683..a6fb89fa62785 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -274,7 +274,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) return ERR_PTR(-ENOMEM); j1939_tp_init(priv); - spin_lock_init(&priv->j1939_socks_lock); + rwlock_init(&priv->j1939_socks_lock); INIT_LIST_HEAD(&priv->j1939_socks); mutex_lock(&j1939_netdev_lock); diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 14c4316632339..94cfc2315e546 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -80,16 +80,16 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk) jsk->state |= J1939_SOCK_BOUND; j1939_priv_get(priv); - spin_lock_bh(&priv->j1939_socks_lock); + write_lock_bh(&priv->j1939_socks_lock); list_add_tail(&jsk->list, &priv->j1939_socks); - spin_unlock_bh(&priv->j1939_socks_lock); + write_unlock_bh(&priv->j1939_socks_lock); } static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk) { - spin_lock_bh(&priv->j1939_socks_lock); + write_lock_bh(&priv->j1939_socks_lock); list_del_init(&jsk->list); - spin_unlock_bh(&priv->j1939_socks_lock); + write_unlock_bh(&priv->j1939_socks_lock); j1939_priv_put(priv); jsk->state &= ~J1939_SOCK_BOUND; @@ -329,13 +329,13 @@ bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb) struct j1939_sock *jsk; bool match = false; - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { match = j1939_sk_recv_match_one(jsk, skcb); if (match) break; } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); return match; } @@ -344,11 +344,11 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sock *jsk; - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { j1939_sk_recv_one(jsk, skb); } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); } static void j1939_sk_sock_destruct(struct sock *sk) @@ -1080,12 +1080,12 @@ void j1939_sk_errqueue(struct j1939_session *session, } /* spread RX notifications to all sockets subscribed to this session */ - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { if (j1939_sk_recv_match_one(jsk, &session->skcb)) __j1939_sk_errqueue(session, &jsk->sk, type); } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); }; void j1939_sk_send_loop_abort(struct sock *sk, int err) @@ -1273,7 +1273,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv) struct j1939_sock *jsk; int error_code = ENETDOWN; - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { jsk->sk.sk_err = error_code; if (!sock_flag(&jsk->sk, SOCK_DEAD)) @@ -1281,7 +1281,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv) j1939_sk_queue_drop_all(priv, jsk, error_code); } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); } static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd, -- GitLab From efe7cf828039aedb297c1f9920b638fffee6aabc Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 20 Oct 2023 15:38:14 +0200 Subject: [PATCH 1067/1405] can: j1939: Fix UAF in j1939_sk_match_filter during setsockopt(SO_J1939_FILTER) Lock jsk->sk to prevent UAF when setsockopt(..., SO_J1939_FILTER, ...) modifies jsk->filters while receiving packets. Following trace was seen on affected system: ================================================================== BUG: KASAN: slab-use-after-free in j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] Read of size 4 at addr ffff888012144014 by task j1939/350 CPU: 0 PID: 350 Comm: j1939 Tainted: G W OE 6.5.0-rc5 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Call Trace: print_report+0xd3/0x620 ? kasan_complete_mode_report_info+0x7d/0x200 ? j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] kasan_report+0xc2/0x100 ? j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] __asan_load4+0x84/0xb0 j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] j1939_sk_recv+0x20b/0x320 [can_j1939] ? __kasan_check_write+0x18/0x20 ? __pfx_j1939_sk_recv+0x10/0x10 [can_j1939] ? j1939_simple_recv+0x69/0x280 [can_j1939] ? j1939_ac_recv+0x5e/0x310 [can_j1939] j1939_can_recv+0x43f/0x580 [can_j1939] ? __pfx_j1939_can_recv+0x10/0x10 [can_j1939] ? raw_rcv+0x42/0x3c0 [can_raw] ? __pfx_j1939_can_recv+0x10/0x10 [can_j1939] can_rcv_filter+0x11f/0x350 [can] can_receive+0x12f/0x190 [can] ? __pfx_can_rcv+0x10/0x10 [can] can_rcv+0xdd/0x130 [can] ? __pfx_can_rcv+0x10/0x10 [can] __netif_receive_skb_one_core+0x13d/0x150 ? __pfx___netif_receive_skb_one_core+0x10/0x10 ? __kasan_check_write+0x18/0x20 ? _raw_spin_lock_irq+0x8c/0xe0 __netif_receive_skb+0x23/0xb0 process_backlog+0x107/0x260 __napi_poll+0x69/0x310 net_rx_action+0x2a1/0x580 ? __pfx_net_rx_action+0x10/0x10 ? __pfx__raw_spin_lock+0x10/0x10 ? handle_irq_event+0x7d/0xa0 __do_softirq+0xf3/0x3f8 do_softirq+0x53/0x80 __local_bh_enable_ip+0x6e/0x70 netif_rx+0x16b/0x180 can_send+0x32b/0x520 [can] ? __pfx_can_send+0x10/0x10 [can] ? __check_object_size+0x299/0x410 raw_sendmsg+0x572/0x6d0 [can_raw] ? __pfx_raw_sendmsg+0x10/0x10 [can_raw] ? apparmor_socket_sendmsg+0x2f/0x40 ? __pfx_raw_sendmsg+0x10/0x10 [can_raw] sock_sendmsg+0xef/0x100 sock_write_iter+0x162/0x220 ? __pfx_sock_write_iter+0x10/0x10 ? __rtnl_unlock+0x47/0x80 ? security_file_permission+0x54/0x320 vfs_write+0x6ba/0x750 ? __pfx_vfs_write+0x10/0x10 ? __fget_light+0x1ca/0x1f0 ? __rcu_read_unlock+0x5b/0x280 ksys_write+0x143/0x170 ? __pfx_ksys_write+0x10/0x10 ? __kasan_check_read+0x15/0x20 ? fpregs_assert_state_consistent+0x62/0x70 __x64_sys_write+0x47/0x60 do_syscall_64+0x60/0x90 ? do_syscall_64+0x6d/0x90 ? irqentry_exit+0x3f/0x50 ? exc_page_fault+0x79/0xf0 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Allocated by task 348: kasan_save_stack+0x2a/0x50 kasan_set_track+0x29/0x40 kasan_save_alloc_info+0x1f/0x30 __kasan_kmalloc+0xb5/0xc0 __kmalloc_node_track_caller+0x67/0x160 j1939_sk_setsockopt+0x284/0x450 [can_j1939] __sys_setsockopt+0x15c/0x2f0 __x64_sys_setsockopt+0x6b/0x80 do_syscall_64+0x60/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Freed by task 349: kasan_save_stack+0x2a/0x50 kasan_set_track+0x29/0x40 kasan_save_free_info+0x2f/0x50 __kasan_slab_free+0x12e/0x1c0 __kmem_cache_free+0x1b9/0x380 kfree+0x7a/0x120 j1939_sk_setsockopt+0x3b2/0x450 [can_j1939] __sys_setsockopt+0x15c/0x2f0 __x64_sys_setsockopt+0x6b/0x80 do_syscall_64+0x60/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Fixes: 9d71dd0c70099 ("can: add support of SAE J1939 protocol") Reported-by: Sili Luo Suggested-by: Sili Luo Acked-by: Oleksij Rempel Cc: stable@vger.kernel.org Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/all/20231020133814.383996-1-o.rempel@pengutronix.de Signed-off-by: Marc Kleine-Budde --- net/can/j1939/j1939-priv.h | 1 + net/can/j1939/socket.c | 22 ++++++++++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h index 74f15592d1708..31a93cae5111b 100644 --- a/net/can/j1939/j1939-priv.h +++ b/net/can/j1939/j1939-priv.h @@ -301,6 +301,7 @@ struct j1939_sock { int ifindex; struct j1939_addr addr; + spinlock_t filters_lock; struct j1939_filter *filters; int nfilters; pgn_t pgn_rx_filter; diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 94cfc2315e546..305dd72c844c7 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -262,12 +262,17 @@ static bool j1939_sk_match_dst(struct j1939_sock *jsk, static bool j1939_sk_match_filter(struct j1939_sock *jsk, const struct j1939_sk_buff_cb *skcb) { - const struct j1939_filter *f = jsk->filters; - int nfilter = jsk->nfilters; + const struct j1939_filter *f; + int nfilter; + + spin_lock_bh(&jsk->filters_lock); + + f = jsk->filters; + nfilter = jsk->nfilters; if (!nfilter) /* receive all when no filters are assigned */ - return true; + goto filter_match_found; for (; nfilter; ++f, --nfilter) { if ((skcb->addr.pgn & f->pgn_mask) != f->pgn) @@ -276,9 +281,15 @@ static bool j1939_sk_match_filter(struct j1939_sock *jsk, continue; if ((skcb->addr.src_name & f->name_mask) != f->name) continue; - return true; + goto filter_match_found; } + + spin_unlock_bh(&jsk->filters_lock); return false; + +filter_match_found: + spin_unlock_bh(&jsk->filters_lock); + return true; } static bool j1939_sk_recv_match_one(struct j1939_sock *jsk, @@ -401,6 +412,7 @@ static int j1939_sk_init(struct sock *sk) atomic_set(&jsk->skb_pending, 0); spin_lock_init(&jsk->sk_session_queue_lock); INIT_LIST_HEAD(&jsk->sk_session_queue); + spin_lock_init(&jsk->filters_lock); /* j1939_sk_sock_destruct() depends on SOCK_RCU_FREE flag */ sock_set_flag(sk, SOCK_RCU_FREE); @@ -703,9 +715,11 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname, } lock_sock(&jsk->sk); + spin_lock_bh(&jsk->filters_lock); ofilters = jsk->filters; jsk->filters = filters; jsk->nfilters = count; + spin_unlock_bh(&jsk->filters_lock); release_sock(&jsk->sk); kfree(ofilters); return 0; -- GitLab From 2aa0a5e65eae27dbd96faca92c84ecbf6f492d42 Mon Sep 17 00:00:00 2001 From: Maxime Jayat Date: Mon, 6 Nov 2023 19:01:58 +0100 Subject: [PATCH 1068/1405] can: netlink: Fix TDCO calculation using the old data bittiming The TDCO calculation was done using the currently applied data bittiming, instead of the newly computed data bittiming, which means that the TDCO had an invalid value unless setting the same data bittiming twice. Fixes: d99755f71a80 ("can: netlink: add interface for CAN-FD Transmitter Delay Compensation (TDC)") Signed-off-by: Maxime Jayat Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/all/40579c18-63c0-43a4-8d4c-f3a6c1c0b417@munic.io Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index 036d85ef07f5b..dfdc039d92a6c 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c @@ -346,7 +346,7 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], /* Neither of TDC parameters nor TDC flags are * provided: do calculation */ - can_calc_tdco(&priv->tdc, priv->tdc_const, &priv->data_bittiming, + can_calc_tdco(&priv->tdc, priv->tdc_const, &dbt, &priv->ctrlmode, priv->ctrlmode_supported); } /* else: both CAN_CTRLMODE_TDC_{AUTO,MANUAL} are explicitly * turned off. TDC is disabled: do nothing -- GitLab From 0846dd77c8349ec92ca0079c9c71d130f34cb192 Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Tue, 13 Feb 2024 10:05:22 -0600 Subject: [PATCH 1069/1405] powerpc/iommu: Fix the missing iommu_group_put() during platform domain attach The function spapr_tce_platform_iommu_attach_dev() is missing to call iommu_group_put() when the domain is already set. This refcount leak shows up with BUG_ON() during DLPAR remove operation as: KernelBug: Kernel bug in state 'None': kernel BUG at arch/powerpc/platforms/pseries/iommu.c:100! Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=8192 NUMA pSeries Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1060.00 (NH1060_016) hv:phyp pSeries NIP: c0000000000ff4d4 LR: c0000000000ff4cc CTR: 0000000000000000 REGS: c0000013aed5f840 TRAP: 0700 Tainted: G I (6.8.0-rc3-autotest-g99bd3cb0d12e) MSR: 8000000000029033 CR: 44002402 XER: 20040000 CFAR: c000000000a0d170 IRQMASK: 0 ... NIP iommu_reconfig_notifier+0x94/0x200 LR iommu_reconfig_notifier+0x8c/0x200 Call Trace: iommu_reconfig_notifier+0x8c/0x200 (unreliable) notifier_call_chain+0xb8/0x19c blocking_notifier_call_chain+0x64/0x98 of_reconfig_notify+0x44/0xdc of_detach_node+0x78/0xb0 ofdt_write.part.0+0x86c/0xbb8 proc_reg_write+0xf4/0x150 vfs_write+0xf8/0x488 ksys_write+0x84/0x140 system_call_exception+0x138/0x330 system_call_vectored_common+0x15c/0x2ec The patch adds the missing iommu_group_put() call. Fixes: a8ca9fc9134c ("powerpc/iommu: Do not do platform domain attach atctions after probe") Reported-by: Venkat Rao Bagalkote Closes: https://lore.kernel.org/all/274e0d2b-b5cc-475e-94e6-8427e88e271d@linux.vnet.ibm.com/ Signed-off-by: Shivaprasad G Bhat Tested-by: Venkat Rao Bagalkote Reviewed-by: Jason Gunthorpe Signed-off-by: Michael Ellerman Link: https://msgid.link/170784021983.6249.10039296655906636112.stgit@linux.ibm.com --- arch/powerpc/kernel/iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index ebe259bdd4629..df17b33b89d13 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1290,8 +1290,10 @@ spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain, int ret = -EINVAL; /* At first attach the ownership is already set */ - if (!domain) + if (!domain) { + iommu_group_put(grp); return 0; + } if (!grp) return -ENODEV; -- GitLab From 8746c6c9dfa31d269c65dd52ab42fde0720b7d91 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 14 Feb 2024 18:48:52 +0530 Subject: [PATCH 1070/1405] drm/buddy: Fix alloc_range() error handling code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Few users have observed display corruption when they boot the machine to KDE Plasma or playing games. We have root caused the problem that whenever alloc_range() couldn't find the required memory blocks the function was returning SUCCESS in some of the corner cases. The right approach would be if the total allocated size is less than the required size, the function should return -ENOSPC. Cc: # 6.7+ Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3097 Tested-by: Mario Limonciello Link: https://patchwork.kernel.org/project/dri-devel/patch/20240207174456.341121-1-Arunpravin.PaneerSelvam@amd.com/ Acked-by: Christian König Reviewed-by: Matthew Auld Signed-off-by: Arunpravin Paneer Selvam Link: https://patchwork.freedesktop.org/patch/msgid/20240214131853.5934-1-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König --- drivers/gpu/drm/drm_buddy.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index f57e6d74fb0e0..c1a99bf4dffd1 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm, } while (1); list_splice_tail(&allocated, blocks); + + if (total_allocated < size) { + err = -ENOSPC; + goto err_free; + } + return 0; err_undo: -- GitLab From a64056bb5a3215bd31c8ce17d609ba0f4d5c55ea Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 14 Feb 2024 18:48:53 +0530 Subject: [PATCH 1071/1405] drm/tests/drm_buddy: add alloc_contiguous test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sanity check DRM_BUDDY_CONTIGUOUS_ALLOCATION. v2: Fix checkpatch warnings. Signed-off-by: Matthew Auld Reviewed-by: Arunpravin Paneer Selvam Signed-off-by: Arunpravin Paneer Selvam Link: https://patchwork.freedesktop.org/patch/msgid/20240214131853.5934-2-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König --- drivers/gpu/drm/tests/drm_buddy_test.c | 89 ++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c index ea2af6bd9abeb..fee6bec757d1a 100644 --- a/drivers/gpu/drm/tests/drm_buddy_test.c +++ b/drivers/gpu/drm/tests/drm_buddy_test.c @@ -8,6 +8,7 @@ #include #include +#include #include @@ -18,6 +19,93 @@ static inline u64 get_size(int order, u64 chunk_size) return (1 << order) * chunk_size; } +static void drm_test_buddy_alloc_contiguous(struct kunit *test) +{ + u64 mm_size, ps = SZ_4K, i, n_pages, total; + struct drm_buddy_block *block; + struct drm_buddy mm; + LIST_HEAD(left); + LIST_HEAD(middle); + LIST_HEAD(right); + LIST_HEAD(allocated); + + mm_size = 16 * 3 * SZ_4K; + + KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps)); + + /* + * Idea is to fragment the address space by alternating block + * allocations between three different lists; one for left, middle and + * right. We can then free a list to simulate fragmentation. In + * particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION, + * including the try_harder path. + */ + + i = 0; + n_pages = mm_size / ps; + do { + struct list_head *list; + int slot = i % 3; + + if (slot == 0) + list = &left; + else if (slot == 1) + list = &middle; + else + list = &right; + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, 0, mm_size, + ps, ps, list, 0), + "buddy_alloc hit an error size=%d\n", + ps); + } while (++i < n_pages); + + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 3 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc didn't error size=%d\n", 3 * ps); + + drm_buddy_free_list(&mm, &middle); + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 3 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc didn't error size=%llu\n", 3 * ps); + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 2 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc didn't error size=%llu\n", 2 * ps); + + drm_buddy_free_list(&mm, &right); + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 3 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc didn't error size=%llu\n", 3 * ps); + /* + * At this point we should have enough contiguous space for 2 blocks, + * however they are never buddies (since we freed middle and right) so + * will require the try_harder logic to find them. + */ + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 2 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc hit an error size=%d\n", 2 * ps); + + drm_buddy_free_list(&mm, &left); + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 3 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc hit an error size=%d\n", 3 * ps); + + total = 0; + list_for_each_entry(block, &allocated, link) + total += drm_buddy_block_size(&mm, block); + + KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3); + + drm_buddy_free_list(&mm, &allocated); + drm_buddy_fini(&mm); +} + static void drm_test_buddy_alloc_pathological(struct kunit *test) { u64 mm_size, size, start = 0; @@ -280,6 +368,7 @@ static struct kunit_case drm_buddy_tests[] = { KUNIT_CASE(drm_test_buddy_alloc_optimistic), KUNIT_CASE(drm_test_buddy_alloc_pessimistic), KUNIT_CASE(drm_test_buddy_alloc_pathological), + KUNIT_CASE(drm_test_buddy_alloc_contiguous), {} }; -- GitLab From e20f378d993b1034eebe3ae78e67f3ed10e75356 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Feb 2024 16:34:54 +0000 Subject: [PATCH 1072/1405] nvmem: include bit index in cell sysfs file name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Creating sysfs files for all Cells caused a boot failure for linux-6.8-rc1 on Apple M1, which (in downstream dts files) has multiple nvmem cells that use the same byte address. This causes the device probe to fail with [ 0.605336] sysfs: cannot create duplicate filename '/devices/platform/soc@200000000/2922bc000.efuse/apple_efuses_nvmem0/cells/efuse@a10' [ 0.605347] CPU: 7 PID: 1 Comm: swapper/0 Tainted: G S 6.8.0-rc1-arnd-5+ #133 [ 0.605355] Hardware name: Apple Mac Studio (M1 Ultra, 2022) (DT) [ 0.605362] Call trace: [ 0.605365] show_stack+0x18/0x2c [ 0.605374] dump_stack_lvl+0x60/0x80 [ 0.605383] dump_stack+0x18/0x24 [ 0.605388] sysfs_warn_dup+0x64/0x80 [ 0.605395] sysfs_add_bin_file_mode_ns+0xb0/0xd4 [ 0.605402] internal_create_group+0x268/0x404 [ 0.605409] sysfs_create_groups+0x38/0x94 [ 0.605415] devm_device_add_groups+0x50/0x94 [ 0.605572] nvmem_populate_sysfs_cells+0x180/0x1b0 [ 0.605682] nvmem_register+0x38c/0x470 [ 0.605789] devm_nvmem_register+0x1c/0x6c [ 0.605895] apple_efuses_probe+0xe4/0x120 [ 0.606000] platform_probe+0xa8/0xd0 As far as I can tell, this is a problem for any device with multiple cells on different bits of the same address. Avoid the issue by changing the file name to include the first bit number. Fixes: 0331c611949f ("nvmem: core: Expose cells through sysfs") Link: https://github.com/AsahiLinux/linux/blob/bd0a1a7d4/arch/arm64/boot/dts/apple/t600x-dieX.dtsi#L156 Cc: Cc: Miquel Raynal Cc: Rafał Miłecki Cc: Chen-Yu Tsai Cc: Srinivas Kandagatla Cc: Greg Kroah-Hartman Cc: Cc: Sven Peter Signed-off-by: Arnd Bergmann Signed-off-by: Srinivas Kandagatla Reviewed-by: Eric Curtin Reviewed-by: Miquel Raynal Link: https://lore.kernel.org/r/20240209163454.98051-1-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-nvmem-cells | 16 ++++++++-------- drivers/nvmem/core.c | 5 +++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-nvmem-cells b/Documentation/ABI/testing/sysfs-nvmem-cells index 7af70adf3690e..c7c9444f92a88 100644 --- a/Documentation/ABI/testing/sysfs-nvmem-cells +++ b/Documentation/ABI/testing/sysfs-nvmem-cells @@ -4,18 +4,18 @@ KernelVersion: 6.5 Contact: Miquel Raynal Description: The "cells" folder contains one file per cell exposed by the - NVMEM device. The name of the file is: @, with - being the cell name and its location in the NVMEM - device, in hexadecimal (without the '0x' prefix, to mimic device - tree node names). The length of the file is the size of the cell - (when known). The content of the file is the binary content of - the cell (may sometimes be ASCII, likely without trailing - character). + NVMEM device. The name of the file is: "@,", + with being the cell name and its location in + the NVMEM device, in hexadecimal bytes and bits (without the + '0x' prefix, to mimic device tree node names). The length of + the file is the size of the cell (when known). The content of + the file is the binary content of the cell (may sometimes be + ASCII, likely without trailing character). Note: This file is only present if CONFIG_NVMEM_SYSFS is enabled. Example:: - hexdump -C /sys/bus/nvmem/devices/1-00563/cells/product-name@d + hexdump -C /sys/bus/nvmem/devices/1-00563/cells/product-name@d,0 00000000 54 4e 34 38 4d 2d 50 2d 44 4e |TN48M-P-DN| 0000000a diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 980123fb4dde0..eb357ac2e54a2 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -460,8 +460,9 @@ static int nvmem_populate_sysfs_cells(struct nvmem_device *nvmem) list_for_each_entry(entry, &nvmem->cells, node) { sysfs_bin_attr_init(&attrs[i]); attrs[i].attr.name = devm_kasprintf(&nvmem->dev, GFP_KERNEL, - "%s@%x", entry->name, - entry->offset); + "%s@%x,%x", entry->name, + entry->offset, + entry->bit_offset); attrs[i].attr.mode = 0444; attrs[i].size = entry->bytes; attrs[i].read = &nvmem_cell_attr_read; -- GitLab From e37243b65d528a8a9f8b9a57a43885f8e8dfc15c Mon Sep 17 00:00:00 2001 From: Gianmarco Lusvardi Date: Tue, 13 Feb 2024 23:05:46 +0000 Subject: [PATCH 1073/1405] bpf, scripts: Correct GPL license name The bpf_doc script refers to the GPL as the "GNU Privacy License". I strongly suspect that the author wanted to refer to the GNU General Public License, under which the Linux kernel is released, as, to the best of my knowledge, there is no license named "GNU Privacy License". This patch corrects the license name in the script accordingly. Fixes: 56a092c89505 ("bpf: add script and prepare bpf.h for new helpers documentation") Signed-off-by: Gianmarco Lusvardi Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20240213230544.930018-3-glusvardi@posteo.net --- scripts/bpf_doc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 61b7dddedc461..0669bac5e900e 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -513,7 +513,7 @@ eBPF programs can have an associated license, passed along with the bytecode instructions to the kernel when the programs are loaded. The format for that string is identical to the one in use for kernel modules (Dual licenses, such as "Dual BSD/GPL", may be used). Some helper functions are only accessible to -programs that are compatible with the GNU Privacy License (GPL). +programs that are compatible with the GNU General Public License (GNU GPL). In order to use such helpers, the eBPF program must be loaded with the correct license string passed (via **attr**) to the **bpf**\\ () system call, and this -- GitLab From 2394ac4145ea91b92271e675a09af2a9ea6840b7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 14 Feb 2024 11:20:46 -0500 Subject: [PATCH 1074/1405] tracing: Inform kmemleak of saved_cmdlines allocation The allocation of the struct saved_cmdlines_buffer structure changed from: s = kmalloc(sizeof(*s), GFP_KERNEL); s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL); to: orig_size = sizeof(*s) + val * TASK_COMM_LEN; order = get_order(orig_size); size = 1 << (order + PAGE_SHIFT); page = alloc_pages(GFP_KERNEL, order); if (!page) return NULL; s = page_address(page); memset(s, 0, sizeof(*s)); s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL); Where that s->saved_cmdlines allocation looks to be a dangling allocation to kmemleak. That's because kmemleak only keeps track of kmalloc() allocations. For allocations that use page_alloc() directly, the kmemleak needs to be explicitly informed about it. Add kmemleak_alloc() and kmemleak_free() around the page allocation so that it doesn't give the following false positive: unreferenced object 0xffff8881010c8000 (size 32760): comm "swapper", pid 0, jiffies 4294667296 hex dump (first 32 bytes): ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ................ ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ................ backtrace (crc ae6ec1b9): [] kmemleak_alloc+0x45/0x80 [] __kmalloc_large_node+0x10d/0x190 [] __kmalloc+0x3b1/0x4c0 [] allocate_cmdlines_buffer+0x113/0x230 [] tracer_alloc_buffers.isra.0+0x124/0x460 [] early_trace_init+0x14/0xa0 [] start_kernel+0x12e/0x3c0 [] x86_64_start_reservations+0x18/0x30 [] x86_64_start_kernel+0x7b/0x80 [] secondary_startup_64_no_verify+0x15e/0x16b Link: https://lore.kernel.org/linux-trace-kernel/87r0hfnr9r.fsf@kernel.org/ Link: https://lore.kernel.org/linux-trace-kernel/20240214112046.09a322d6@gandalf.local.home Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Catalin Marinas Fixes: 44dc5c41b5b1 ("tracing: Fix wasted memory in saved_cmdlines logic") Reported-by: Kalle Valo Tested-by: Kalle Valo Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index aa54810e8b569..8198bfc54b58d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -2339,6 +2340,7 @@ static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN); kfree(s->map_cmdline_to_pid); + kmemleak_free(s); free_pages((unsigned long)s, order); } @@ -2358,6 +2360,7 @@ static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val) return NULL; s = page_address(page); + kmemleak_alloc(s, size, 1, GFP_KERNEL); memset(s, 0, sizeof(*s)); /* Round up to actual allocation */ -- GitLab From c56d055893cbe97848611855d1c97d0ab171eccc Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Mon, 15 Jan 2024 16:28:25 +0800 Subject: [PATCH 1075/1405] igb: Fix string truncation warnings in igb_set_fw_version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 1978d3ead82c ("intel: fix string truncation warnings") fixes '-Wformat-truncation=' warnings in igb_main.c by using kasprintf. drivers/net/ethernet/intel/igb/igb_main.c:3092:53: warning:‘%d’ directive output may be truncated writing between 1 and 5 bytes into a region of size between 1 and 13 [-Wformat-truncation=] 3092 | "%d.%d, 0x%08x, %d.%d.%d", | ^~ drivers/net/ethernet/intel/igb/igb_main.c:3092:34: note:directive argument in the range [0, 65535] 3092 | "%d.%d, 0x%08x, %d.%d.%d", | ^~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/intel/igb/igb_main.c:3092:34: note:directive argument in the range [0, 65535] drivers/net/ethernet/intel/igb/igb_main.c:3090:25: note:‘snprintf’ output between 23 and 43 bytes into a destination of size 32 kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Fix this warning by using a larger space for adapter->fw_version, and then fall back and continue to use snprintf. Fixes: 1978d3ead82c ("intel: fix string truncation warnings") Signed-off-by: Kunwu Chan Cc: Kunwu Chan Suggested-by: Jakub Kicinski Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb.h | 2 +- drivers/net/ethernet/intel/igb/igb_main.c | 35 ++++++++++++----------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index a2b759531cb7b..3c2dc7bdebb50 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -637,7 +637,7 @@ struct igb_adapter { struct timespec64 period; } perout[IGB_N_PEROUT]; - char fw_version[32]; + char fw_version[48]; #ifdef CONFIG_IGB_HWMON struct hwmon_buff *igb_hwmon_buff; bool ets; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 4df8d4153aa5f..cebb44f51d5f5 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3069,7 +3069,6 @@ void igb_set_fw_version(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct e1000_fw_version fw; - char *lbuf; igb_get_fw_version(hw, &fw); @@ -3077,34 +3076,36 @@ void igb_set_fw_version(struct igb_adapter *adapter) case e1000_i210: case e1000_i211: if (!(igb_get_flash_presence_i210(hw))) { - lbuf = kasprintf(GFP_KERNEL, "%2d.%2d-%d", - fw.invm_major, fw.invm_minor, - fw.invm_img_type); + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%2d.%2d-%d", + fw.invm_major, fw.invm_minor, + fw.invm_img_type); break; } fallthrough; default: /* if option rom is valid, display its version too */ if (fw.or_valid) { - lbuf = kasprintf(GFP_KERNEL, "%d.%d, 0x%08x, %d.%d.%d", - fw.eep_major, fw.eep_minor, - fw.etrack_id, fw.or_major, fw.or_build, - fw.or_patch); + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%d.%d, 0x%08x, %d.%d.%d", + fw.eep_major, fw.eep_minor, fw.etrack_id, + fw.or_major, fw.or_build, fw.or_patch); /* no option rom */ } else if (fw.etrack_id != 0X0000) { - lbuf = kasprintf(GFP_KERNEL, "%d.%d, 0x%08x", - fw.eep_major, fw.eep_minor, - fw.etrack_id); + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%d.%d, 0x%08x", + fw.eep_major, fw.eep_minor, fw.etrack_id); } else { - lbuf = kasprintf(GFP_KERNEL, "%d.%d.%d", fw.eep_major, - fw.eep_minor, fw.eep_build); + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%d.%d.%d", + fw.eep_major, fw.eep_minor, fw.eep_build); } break; } - - /* the truncate happens here if it doesn't fit */ - strscpy(adapter->fw_version, lbuf, sizeof(adapter->fw_version)); - kfree(lbuf); } /** -- GitLab From 55ea989977f4e11a1c3bdfabb51295090bb0f7d6 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Wed, 24 Jan 2024 07:57:00 +0200 Subject: [PATCH 1076/1405] igc: Remove temporary workaround PHY_CONTROL register works as defined in the IEEE 802.3 specification (IEEE 802.3-2008 22.2.4.1). Tidy up the temporary workaround. User impact: PHY can now be powered down when the ethernet link is down. Testing hints: ip link set down (or just disconnect the ethernet cable). Oldest tested NVM version is: 1045:740. Fixes: 5586838fe9ce ("igc: Add code for PHY support") Signed-off-by: Sasha Neftin Reviewed-by: Paul Menzel Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_phy.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c index 7cd8716d2ffa3..861f370768616 100644 --- a/drivers/net/ethernet/intel/igc/igc_phy.c +++ b/drivers/net/ethernet/intel/igc/igc_phy.c @@ -130,11 +130,7 @@ void igc_power_down_phy_copper(struct igc_hw *hw) /* The PHY will retain its settings across a power down/up cycle */ hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg); mii_reg |= MII_CR_POWER_DOWN; - - /* Temporary workaround - should be removed when PHY will implement - * IEEE registers as properly - */ - /* hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);*/ + hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg); usleep_range(1000, 2000); } -- GitLab From 321da3dc1f3c92a12e3c5da934090d2992a8814c Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 13 Feb 2024 09:33:06 -0500 Subject: [PATCH 1077/1405] scsi: sd: usb_storage: uas: Access media prior to querying device properties It has been observed that some USB/UAS devices return generic properties hardcoded in firmware for mode pages for a period of time after a device has been discovered. The reported properties are either garbage or they do not accurately reflect the characteristics of the physical storage device attached in the case of a bridge. Prior to commit 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice") we would call revalidate several times during device discovery. As a result, incorrect values would eventually get replaced with ones accurately describing the attached storage. When we did away with the redundant revalidate pass, several cases were reported where devices reported nonsensical values or would end up in write-protected state. An initial attempt at addressing this issue involved introducing a delayed second revalidate invocation. However, this approach still left some devices reporting incorrect characteristics. Tasos Sahanidis debugged the problem further and identified that introducing a READ operation prior to MODE SENSE fixed the problem and that it wasn't a timing issue. Issuing a READ appears to cause the devices to update their state to reflect the actual properties of the storage media. Device properties like vendor, model, and storage capacity appear to be correctly reported from the get-go. It is unclear why these devices defer populating the remaining characteristics. Match the behavior of a well known commercial operating system and trigger a READ operation prior to querying device characteristics to force the device to populate the mode pages. The additional READ is triggered by a flag set in the USB storage and UAS drivers. We avoid issuing the READ for other transport classes since some storage devices identify Linux through our particular discovery command sequence. Link: https://lore.kernel.org/r/20240213143306.2194237-1-martin.petersen@oracle.com Fixes: 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice") Cc: stable@vger.kernel.org Reported-by: Tasos Sahanidis Reviewed-by: Ewan D. Milne Reviewed-by: Bart Van Assche Tested-by: Tasos Sahanidis Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 26 +++++++++++++++++++++++++- drivers/usb/storage/scsiglue.c | 7 +++++++ drivers/usb/storage/uas.c | 7 +++++++ include/scsi/scsi_device.h | 1 + 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 0833b3e6aa6e8..bdd0acf7fa3cb 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3407,6 +3407,24 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, return true; } +static void sd_read_block_zero(struct scsi_disk *sdkp) +{ + unsigned int buf_len = sdkp->device->sector_size; + char *buffer, cmd[10] = { }; + + buffer = kmalloc(buf_len, GFP_KERNEL); + if (!buffer) + return; + + cmd[0] = READ_10; + put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ + put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ + + scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len, + SD_TIMEOUT, sdkp->max_retries, NULL); + kfree(buffer); +} + /** * sd_revalidate_disk - called the first time a new disk is seen, * performs disk spin up, read_capacity, etc. @@ -3446,7 +3464,13 @@ static int sd_revalidate_disk(struct gendisk *disk) */ if (sdkp->media_present) { sd_read_capacity(sdkp, buffer); - + /* + * Some USB/UAS devices return generic values for mode pages + * until the media has been accessed. Trigger a READ operation + * to force the device to populate mode pages. + */ + if (sdp->read_before_ms) + sd_read_block_zero(sdkp); /* * set the default to rotational. All non-rotational devices * support the block characteristics VPD page, which will diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index c54e9805da536..12cf9940e5b67 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -179,6 +179,13 @@ static int slave_configure(struct scsi_device *sdev) */ sdev->use_192_bytes_for_3f = 1; + /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 9707f53cfda9c..71ace274761f1 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -878,6 +878,13 @@ static int uas_slave_configure(struct scsi_device *sdev) if (devinfo->flags & US_FL_CAPACITY_HEURISTICS) sdev->guess_capacity = 1; + /* + * Some devices report generic values until the media has been + * accessed. Force a READ(10) prior to querying device + * characteristics. + */ + sdev->read_before_ms = 1; + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 5ec1e71a09de7..01c02cb76ea6b 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -208,6 +208,7 @@ struct scsi_device { unsigned use_10_for_rw:1; /* first try 10-byte read / write */ unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */ unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */ + unsigned read_before_ms:1; /* perform a READ before MODE SENSE */ unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */ unsigned no_write_same:1; /* no WRITE SAME command */ unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */ -- GitLab From d8bdd795d383a23e38ac48a40d3d223caf47b290 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 14 Feb 2024 17:05:38 +0100 Subject: [PATCH 1078/1405] lsm: fix integer overflow in lsm_set_self_attr() syscall security_setselfattr() has an integer overflow bug that leads to out-of-bounds access when userspace provides bogus input: `lctx->ctx_len + sizeof(*lctx)` is checked against `lctx->len` (and, redundantly, also against `size`), but there are no checks on `lctx->ctx_len`. Therefore, userspace can provide an `lsm_ctx` with `->ctx_len` set to a value between `-sizeof(struct lsm_ctx)` and -1, and this bogus `->ctx_len` will then be passed to an LSM module as a buffer length, causing LSM modules to perform out-of-bounds accesses. The following reproducer will demonstrate this under ASAN (if AppArmor is loaded as an LSM): ``` struct lsm_ctx { uint64_t id; uint64_t flags; uint64_t len; uint64_t ctx_len; char ctx[]; }; int main(void) { size_t size = sizeof(struct lsm_ctx); struct lsm_ctx *ctx = malloc(size); ctx->id = 104/*LSM_ID_APPARMOR*/; ctx->flags = 0; ctx->len = size; ctx->ctx_len = -sizeof(struct lsm_ctx); syscall( 460/*__NR_lsm_set_self_attr*/, /*attr=*/ 100/*LSM_ATTR_CURRENT*/, /*ctx=*/ ctx, /*size=*/ size, /*flags=*/ 0 ); } ``` Fixes: a04a1198088a ("LSM: syscalls for current process attributes") Signed-off-by: Jann Horn Acked-by: Casey Schaufler [PM: subj tweak, removed ref to ASAN splat that isn't included] Signed-off-by: Paul Moore --- security/security.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/security/security.c b/security/security.c index 3aaad75c9ce85..7035ee35a3930 100644 --- a/security/security.c +++ b/security/security.c @@ -29,6 +29,7 @@ #include #include #include +#include #include /* How many LSMs were built into the kernel? */ @@ -4015,6 +4016,7 @@ int security_setselfattr(unsigned int attr, struct lsm_ctx __user *uctx, struct security_hook_list *hp; struct lsm_ctx *lctx; int rc = LSM_RET_DEFAULT(setselfattr); + u64 required_len; if (flags) return -EINVAL; @@ -4027,8 +4029,9 @@ int security_setselfattr(unsigned int attr, struct lsm_ctx __user *uctx, if (IS_ERR(lctx)) return PTR_ERR(lctx); - if (size < lctx->len || size < lctx->ctx_len + sizeof(*lctx) || - lctx->len < lctx->ctx_len + sizeof(*lctx)) { + if (size < lctx->len || + check_add_overflow(sizeof(*lctx), lctx->ctx_len, &required_len) || + lctx->len < required_len) { rc = -EINVAL; goto free_out; } -- GitLab From 3b9ab248bc45abf8c2365ed3eec86cdefd4d626a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 2 Feb 2024 10:31:42 +0900 Subject: [PATCH 1079/1405] kbuild: use 4-space indentation when followed by conditionals GNU Make manual [1] clearly forbids a tab at the beginning of the conditional directive line: "Extra spaces are allowed and ignored at the beginning of the conditional directive line, but a tab is not allowed." This will not work for the next release of GNU Make, hence commit 82175d1f9430 ("kbuild: Replace tabs with spaces when followed by conditionals") replaced the inappropriate tabs with 8 spaces. However, the 8-space indentation cannot be visually distinguished. Linus suggested 2-4 spaces for those nested if-statements. [2] This commit redoes the replacement with 4 spaces. [1]: https://www.gnu.org/software/make/manual/make.html#Conditional-Syntax [2]: https://lore.kernel.org/all/CAHk-=whJKZNZWsa-VNDKafS_VfY4a5dAjG-r8BZgWk_a-xSepw@mail.gmail.com/ Suggested-by: Linus Torvalds Signed-off-by: Masahiro Yamada --- Makefile | 12 ++++++------ arch/m68k/Makefile | 4 ++-- arch/parisc/Makefile | 4 ++-- arch/x86/Makefile | 8 ++++---- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index 7e0b2ad98905b..ed80c7d98a7ed 100644 --- a/Makefile +++ b/Makefile @@ -294,15 +294,15 @@ may-sync-config := 1 single-build := ifneq ($(filter $(no-dot-config-targets), $(MAKECMDGOALS)),) - ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),) + ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),) need-config := - endif + endif endif ifneq ($(filter $(no-sync-config-targets), $(MAKECMDGOALS)),) - ifeq ($(filter-out $(no-sync-config-targets), $(MAKECMDGOALS)),) + ifeq ($(filter-out $(no-sync-config-targets), $(MAKECMDGOALS)),) may-sync-config := - endif + endif endif need-compiler := $(may-sync-config) @@ -323,9 +323,9 @@ endif # We cannot build single targets and the others at the same time ifneq ($(filter $(single-targets), $(MAKECMDGOALS)),) single-build := 1 - ifneq ($(filter-out $(single-targets), $(MAKECMDGOALS)),) + ifneq ($(filter-out $(single-targets), $(MAKECMDGOALS)),) mixed-build := 1 - endif + endif endif # For "make -j clean all", "make -j mrproper defconfig all", etc. diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile index 76ef1a67c3611..0abcf994ce550 100644 --- a/arch/m68k/Makefile +++ b/arch/m68k/Makefile @@ -15,10 +15,10 @@ KBUILD_DEFCONFIG := multi_defconfig ifdef cross_compiling - ifeq ($(CROSS_COMPILE),) + ifeq ($(CROSS_COMPILE),) CROSS_COMPILE := $(call cc-cross-prefix, \ m68k-linux-gnu- m68k-linux- m68k-unknown-linux-gnu-) - endif + endif endif # diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 7486b3b305949..316f84f1d15c8 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -50,12 +50,12 @@ export CROSS32CC # Set default cross compiler for kernel build ifdef cross_compiling - ifeq ($(CROSS_COMPILE),) + ifeq ($(CROSS_COMPILE),) CC_SUFFIXES = linux linux-gnu unknown-linux-gnu suse-linux CROSS_COMPILE := $(call cc-cross-prefix, \ $(foreach a,$(CC_ARCHES), \ $(foreach s,$(CC_SUFFIXES),$(a)-$(s)-))) - endif + endif endif ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 2264db14a25d3..da8f3caf27815 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -112,13 +112,13 @@ ifeq ($(CONFIG_X86_32),y) # temporary until string.h is fixed KBUILD_CFLAGS += -ffreestanding - ifeq ($(CONFIG_STACKPROTECTOR),y) - ifeq ($(CONFIG_SMP),y) + ifeq ($(CONFIG_STACKPROTECTOR),y) + ifeq ($(CONFIG_SMP),y) KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard - else + else KBUILD_CFLAGS += -mstack-protector-guard=global - endif endif + endif else BITS := 64 UTS_MACHINE := x86_64 -- GitLab From f44bff19268517ee98e80e944cad0f04f1db72e3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 12 Feb 2024 12:19:04 +0100 Subject: [PATCH 1080/1405] i2c: pasemi: split driver into two separate modules On powerpc, it is possible to compile test both the new apple (arm) and old pasemi (powerpc) drivers for the i2c hardware at the same time, which leads to a warning about linking the same object file twice: scripts/Makefile.build:244: drivers/i2c/busses/Makefile: i2c-pasemi-core.o is added to multiple modules: i2c-apple i2c-pasemi Rework the driver to have an explicit helper module, letting Kbuild take care of whether this should be built-in or a loadable driver. Fixes: 9bc5f4f660ff ("i2c: pasemi: Split pci driver to its own file") Signed-off-by: Arnd Bergmann Reviewed-by: Sven Peter Signed-off-by: Andi Shyti --- drivers/i2c/busses/Makefile | 6 ++---- drivers/i2c/busses/i2c-pasemi-core.c | 6 ++++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index 3757b9391e60a..aa0ee8ecd6f2f 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -90,10 +90,8 @@ obj-$(CONFIG_I2C_NPCM) += i2c-npcm7xx.o obj-$(CONFIG_I2C_OCORES) += i2c-ocores.o obj-$(CONFIG_I2C_OMAP) += i2c-omap.o obj-$(CONFIG_I2C_OWL) += i2c-owl.o -i2c-pasemi-objs := i2c-pasemi-core.o i2c-pasemi-pci.o -obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi.o -i2c-apple-objs := i2c-pasemi-core.o i2c-pasemi-platform.o -obj-$(CONFIG_I2C_APPLE) += i2c-apple.o +obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi-core.o i2c-pasemi-pci.o +obj-$(CONFIG_I2C_APPLE) += i2c-pasemi-core.o i2c-pasemi-platform.o obj-$(CONFIG_I2C_PCA_PLATFORM) += i2c-pca-platform.o obj-$(CONFIG_I2C_PNX) += i2c-pnx.o obj-$(CONFIG_I2C_PXA) += i2c-pxa.o diff --git a/drivers/i2c/busses/i2c-pasemi-core.c b/drivers/i2c/busses/i2c-pasemi-core.c index 7d54a9f34c74b..bd8becbdeeb28 100644 --- a/drivers/i2c/busses/i2c-pasemi-core.c +++ b/drivers/i2c/busses/i2c-pasemi-core.c @@ -369,6 +369,7 @@ int pasemi_i2c_common_probe(struct pasemi_smbus *smbus) return 0; } +EXPORT_SYMBOL_GPL(pasemi_i2c_common_probe); irqreturn_t pasemi_irq_handler(int irq, void *dev_id) { @@ -378,3 +379,8 @@ irqreturn_t pasemi_irq_handler(int irq, void *dev_id) complete(&smbus->irq_completion); return IRQ_HANDLED; } +EXPORT_SYMBOL_GPL(pasemi_irq_handler); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Olof Johansson "); +MODULE_DESCRIPTION("PA Semi PWRficient SMBus driver"); -- GitLab From c1c9d0f6f7f1dbf29db996bd8e166242843a5f21 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 14 Feb 2024 15:59:39 +0100 Subject: [PATCH 1081/1405] i2c: i801: Fix block process call transactions According to the Intel datasheets, software must reset the block buffer index twice for block process call transactions: once before writing the outgoing data to the buffer, and once again before reading the incoming data from the buffer. The driver is currently missing the second reset, causing the wrong portion of the block buffer to be read. Signed-off-by: Jean Delvare Reported-by: Piotr Zakowski Closes: https://lore.kernel.org/linux-i2c/20240213120553.7b0ab120@endymion.delvare/ Fixes: 315cd67c9453 ("i2c: i801: Add Block Write-Block Read Process Call support") Reviewed-by: Alexander Sverdlin Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-i801.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 3932e8d96a171..2c36b36d7d516 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -498,11 +498,10 @@ static int i801_block_transaction_by_block(struct i801_priv *priv, /* Set block buffer mode */ outb_p(inb_p(SMBAUXCTL(priv)) | SMBAUXCTL_E32B, SMBAUXCTL(priv)); - inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ - if (read_write == I2C_SMBUS_WRITE) { len = data->block[0]; outb_p(len, SMBHSTDAT0(priv)); + inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ for (i = 0; i < len; i++) outb_p(data->block[i+1], SMBBLKDAT(priv)); } @@ -520,6 +519,7 @@ static int i801_block_transaction_by_block(struct i801_priv *priv, } data->block[0] = len; + inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ for (i = 0; i < len; i++) data->block[i + 1] = inb_p(SMBBLKDAT(priv)); } -- GitLab From 6388cfd0e69b56ca640610f1bf29334619d18142 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 10 Feb 2024 16:20:04 +0100 Subject: [PATCH 1082/1405] docs: kconfig: Fix grammar and formatting - Remove unnecessary spaces - Fix grammar s/to solution/solution/ Signed-off-by: Thorsten Blum Reviewed-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- Documentation/kbuild/Kconfig.recursion-issue-01 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/kbuild/Kconfig.recursion-issue-01 b/Documentation/kbuild/Kconfig.recursion-issue-01 index e8877db0461fb..ac49836d8ecf8 100644 --- a/Documentation/kbuild/Kconfig.recursion-issue-01 +++ b/Documentation/kbuild/Kconfig.recursion-issue-01 @@ -16,13 +16,13 @@ # that are possible for CORE. So for example if CORE_BELL_A_ADVANCED is 'y', # CORE must be 'y' too. # -# * What influences CORE_BELL_A_ADVANCED ? +# * What influences CORE_BELL_A_ADVANCED? # # As the name implies CORE_BELL_A_ADVANCED is an advanced feature of # CORE_BELL_A so naturally it depends on CORE_BELL_A. So if CORE_BELL_A is 'y' # we know CORE_BELL_A_ADVANCED can be 'y' too. # -# * What influences CORE_BELL_A ? +# * What influences CORE_BELL_A? # # CORE_BELL_A depends on CORE, so CORE influences CORE_BELL_A. # @@ -34,7 +34,7 @@ # the "recursive dependency detected" error. # # Reading the Documentation/kbuild/Kconfig.recursion-issue-01 file it may be -# obvious that an easy to solution to this problem should just be the removal +# obvious that an easy solution to this problem should just be the removal # of the "select CORE" from CORE_BELL_A_ADVANCED as that is implicit already # since CORE_BELL_A depends on CORE. Recursive dependency issues are not always # so trivial to resolve, we provide another example below of practical -- GitLab From e3a9ee963ad8ba677ca925149812c5932b49af69 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 12 Feb 2024 19:05:10 -0700 Subject: [PATCH 1083/1405] kbuild: Fix changing ELF file type for output of gen_btf for big endian Commit 90ceddcb4950 ("bpf: Support llvm-objcopy for vmlinux BTF") changed the ELF type of .btf.vmlinux.bin.o to ET_REL via dd, which works fine for little endian platforms: 00000000 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 |.ELF............| -00000010 03 00 b7 00 01 00 00 00 00 00 00 80 00 80 ff ff |................| +00000010 01 00 b7 00 01 00 00 00 00 00 00 80 00 80 ff ff |................| However, for big endian platforms, it changes the wrong byte, resulting in an invalid ELF file type, which ld.lld rejects: 00000000 7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00 |.ELF............| -00000010 00 03 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................| +00000010 01 03 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................| Type: : 103 ld.lld: error: .btf.vmlinux.bin.o: unknown file type Fix this by updating the entire 16-bit e_type field rather than just a single byte, so that everything works correctly for all platforms and linkers. 00000000 7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00 |.ELF............| -00000010 00 03 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................| +00000010 00 01 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................| Type: REL (Relocatable file) While in the area, update the comment to mention that binutils 2.35+ matches LLD's behavior of rejecting an ET_EXEC input, which occurred after the comment was added. Cc: stable@vger.kernel.org Fixes: 90ceddcb4950 ("bpf: Support llvm-objcopy for vmlinux BTF") Link: https://github.com/llvm/llvm-project/pull/75643 Suggested-by: Masahiro Yamada Signed-off-by: Nathan Chancellor Reviewed-by: Fangrui Song Reviewed-by: Nicolas Schier Reviewed-by: Kees Cook Reviewed-by: Justin Stitt Signed-off-by: Masahiro Yamada --- scripts/link-vmlinux.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index a432b171be826..7862a81017477 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -135,8 +135,13 @@ gen_btf() ${OBJCOPY} --only-section=.BTF --set-section-flags .BTF=alloc,readonly \ --strip-all ${1} ${2} 2>/dev/null # Change e_type to ET_REL so that it can be used to link final vmlinux. - # Unlike GNU ld, lld does not allow an ET_EXEC input. - printf '\1' | dd of=${2} conv=notrunc bs=1 seek=16 status=none + # GNU ld 2.35+ and lld do not allow an ET_EXEC input. + if is_enabled CONFIG_CPU_BIG_ENDIAN; then + et_rel='\0\1' + else + et_rel='\1\0' + fi + printf "${et_rel}" | dd of=${2} conv=notrunc bs=1 seek=16 status=none } # Create ${2} .S file with all symbols from the ${1} object file -- GitLab From dae4a0171e25884787da32823b3081b4c2acebb2 Mon Sep 17 00:00:00 2001 From: Andrew Ballance Date: Tue, 13 Feb 2024 19:23:05 -0600 Subject: [PATCH 1084/1405] gen_compile_commands: fix invalid escape sequence warning With python 3.12, '\#' results in this warning SyntaxWarning: invalid escape sequence '\#' Signed-off-by: Andrew Ballance Reviewed-by: Justin Stitt Signed-off-by: Masahiro Yamada --- scripts/clang-tools/gen_compile_commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py index 5dea4479240bc..e4fb686dfaa9f 100755 --- a/scripts/clang-tools/gen_compile_commands.py +++ b/scripts/clang-tools/gen_compile_commands.py @@ -170,7 +170,7 @@ def process_line(root_directory, command_prefix, file_path): # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the # kernel version). The compile_commands.json file is not interepreted # by Make, so this code replaces the escaped version with '#'. - prefix = command_prefix.replace('\#', '#').replace('$(pound)', '#') + prefix = command_prefix.replace(r'\#', '#').replace('$(pound)', '#') # Return the canonical path, eliminating any symbolic links encountered in the path. abs_path = os.path.realpath(os.path.join(root_directory, file_path)) -- GitLab From 5d9a16b2a4d9e8fa028892ded43f6501bc2969e5 Mon Sep 17 00:00:00 2001 From: Radek Krejci Date: Wed, 14 Feb 2024 10:14:07 +0100 Subject: [PATCH 1085/1405] modpost: trim leading spaces when processing source files list get_line() does not trim the leading spaces, but the parse_source_files() expects to get lines with source files paths where the first space occurs after the file path. Fixes: 70f30cfe5b89 ("modpost: use read_text_file() and get_line() for reading text files") Signed-off-by: Radek Krejci Signed-off-by: Masahiro Yamada --- scripts/mod/sumversion.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c index 31066bfdba04e..dc4878502276c 100644 --- a/scripts/mod/sumversion.c +++ b/scripts/mod/sumversion.c @@ -326,7 +326,12 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md) /* Sum all files in the same dir or subdirs. */ while ((line = get_line(&pos))) { - char* p = line; + char* p; + + /* trim the leading spaces away */ + while (isspace(*line)) + line++; + p = line; if (strncmp(line, "source_", sizeof("source_")-1) == 0) { p = strrchr(line, ' '); -- GitLab From f6374a82fc85bf911d033e2fa791372ce3356270 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 8 Feb 2024 15:46:03 +0100 Subject: [PATCH 1086/1405] netfilter: nft_set_pipapo: fix missing : in kdoc Add missing : in kdoc field names. Fixes: 8683f4b9950d ("nft_set_pipapo: Prepare for vectorised implementation: helpers") Reported-by: Paolo Abeni Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index f59a0cd811051..3842c7341a9f4 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -144,10 +144,10 @@ struct nft_pipapo_scratch { /** * struct nft_pipapo_match - Data used for lookup and matching - * @field_count Amount of fields in set + * @field_count: Amount of fields in set * @scratch: Preallocated per-CPU maps for partial matching results * @bsize_max: Maximum lookup table bucket size of all fields, in longs - * @rcu Matching data is swapped on commits + * @rcu: Matching data is swapped on commits * @f: Fields, with lookup and mapping tables */ struct nft_pipapo_match { -- GitLab From 0f1ae2821fa4b13ab0f5ad7ff89fa57efcb04fe0 Mon Sep 17 00:00:00 2001 From: Kyle Swenson Date: Thu, 8 Feb 2024 23:56:31 +0000 Subject: [PATCH 1087/1405] netfilter: nat: restore default DNAT behavior When a DNAT rule is configured via iptables with different port ranges, iptables -t nat -A PREROUTING -p tcp -d 10.0.0.2 -m tcp --dport 32000:32010 -j DNAT --to-destination 192.168.0.10:21000-21010 we seem to be DNATing to some random port on the LAN side. While this is expected if --random is passed to the iptables command, it is not expected without passing --random. The expected behavior (and the observed behavior prior to the commit in the "Fixes" tag) is the traffic will be DNAT'd to 192.168.0.10:21000 unless there is a tuple collision with that destination. In that case, we expect the traffic to be instead DNAT'd to 192.168.0.10:21001, so on so forth until the end of the range. This patch intends to restore the behavior observed prior to the "Fixes" tag. Fixes: 6ed5943f8735 ("netfilter: nat: remove l4 protocol port rovers") Signed-off-by: Kyle Swenson Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index c3d7ecbc777ce..016c816d91cbc 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -551,8 +551,11 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, find_free_id: if (range->flags & NF_NAT_RANGE_PROTO_OFFSET) off = (ntohs(*keyptr) - ntohs(range->base_proto.all)); - else + else if ((range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL) || + maniptype != NF_NAT_MANIP_DST) off = get_random_u16(); + else + off = 0; attempts = range_size; if (attempts > NF_NAT_MAX_ATTEMPTS) -- GitLab From 84443741faab9045d53f022a9ac6a6633067a481 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 14 Feb 2024 15:42:35 +0100 Subject: [PATCH 1088/1405] netfilter: nf_tables: fix bidirectional offload regression Commit 8f84780b84d6 ("netfilter: flowtable: allow unidirectional rules") made unidirectional flow offload possible, while completely ignoring (and breaking) bidirectional flow offload for nftables. Add the missing flag that was left out as an exercise for the reader :) Cc: Vlad Buslov Fixes: 8f84780b84d6 ("netfilter: flowtable: allow unidirectional rules") Reported-by: Daniel Golle Signed-off-by: Felix Fietkau Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_flow_offload.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 397351fa4d5f8..ab95760987010 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -361,6 +361,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; } + __set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags); ret = flow_offload_add(flowtable, flow); if (ret < 0) goto err_flow_add; -- GitLab From a37ee9e117ef73bbc2f5c0b31911afd52d229861 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 14 Feb 2024 08:23:05 -0700 Subject: [PATCH 1089/1405] io_uring/net: fix multishot accept overflow handling If we hit CQ ring overflow when attempting to post a multishot accept completion, we don't properly save the result or return code. This results in losing the accepted fd value. Instead, we return the result from the poll operation that triggered the accept retry. This is generally POLLIN|POLLPRI|POLLRDNORM|POLLRDBAND which is 0xc3, or 195, which looks like a valid file descriptor, but it really has no connection to that. Handle this like we do for other multishot completions - assign the result, and return IOU_STOP_MULTISHOT to cancel any further completions from this request when overflow is hit. This preserves the result, as we should, and tells the application that the request needs to be re-armed. Cc: stable@vger.kernel.org Fixes: 515e26961295 ("io_uring: revert "io_uring fix multishot accept ordering"") Link: https://github.com/axboe/liburing/issues/1062 Signed-off-by: Jens Axboe --- io_uring/net.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 43bc9a5f96f9d..161622029147c 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1372,7 +1372,7 @@ int io_accept(struct io_kiocb *req, unsigned int issue_flags) * has already been done */ if (issue_flags & IO_URING_F_MULTISHOT) - ret = IOU_ISSUE_SKIP_COMPLETE; + return IOU_ISSUE_SKIP_COMPLETE; return ret; } if (ret == -ERESTARTSYS) @@ -1397,7 +1397,8 @@ int io_accept(struct io_kiocb *req, unsigned int issue_flags) ret, IORING_CQE_F_MORE)) goto retry; - return -ECANCELED; + io_req_set_res(req, ret, 0); + return IOU_STOP_MULTISHOT; } int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -- GitLab From dadd1701ae11a204dd4bea8086905a9576c4b63c Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Tue, 13 Feb 2024 10:39:55 -0800 Subject: [PATCH 1090/1405] ice: Add check for lport extraction to LAG init To fully support initializing the LAG support code, a DDP package that extracts the logical port from the metadata is required. If such a package is not present, there could be difficulties in supporting some bond types. Add a check into the initialization flow that will bypass the new paths if any of the support pieces are missing. Reviewed-by: Przemek Kitszel Fixes: df006dd4b1dc ("ice: Add initial support framework for LAG") Signed-off-by: Dave Ertman Reviewed-by: Marcin Szycik Reviewed-by: Simon Horman Tested-by: Sujai Buvaneswaran Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20240213183957.1483857-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_lag.c | 25 ++++++++++++++++++++++-- drivers/net/ethernet/intel/ice/ice_lag.h | 3 +++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 2a25323105e5b..467372d541d21 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -151,6 +151,27 @@ ice_lag_find_hw_by_lport(struct ice_lag *lag, u8 lport) return NULL; } +/** + * ice_pkg_has_lport_extract - check if lport extraction supported + * @hw: HW struct + */ +static bool ice_pkg_has_lport_extract(struct ice_hw *hw) +{ + int i; + + for (i = 0; i < hw->blk[ICE_BLK_SW].es.count; i++) { + u16 offset; + u8 fv_prot; + + ice_find_prot_off(hw, ICE_BLK_SW, ICE_SW_DEFAULT_PROFILE, i, + &fv_prot, &offset); + if (fv_prot == ICE_FV_PROT_MDID && + offset == ICE_LP_EXT_BUF_OFFSET) + return true; + } + return false; +} + /** * ice_lag_find_primary - returns pointer to primary interfaces lag struct * @lag: local interfaces lag struct @@ -1206,7 +1227,7 @@ static void ice_lag_del_prune_list(struct ice_lag *lag, struct ice_pf *event_pf) } /** - * ice_lag_init_feature_support_flag - Check for NVM support for LAG + * ice_lag_init_feature_support_flag - Check for package and NVM support for LAG * @pf: PF struct */ static void ice_lag_init_feature_support_flag(struct ice_pf *pf) @@ -1219,7 +1240,7 @@ static void ice_lag_init_feature_support_flag(struct ice_pf *pf) else ice_clear_feature_support(pf, ICE_F_ROCE_LAG); - if (caps->sriov_lag) + if (caps->sriov_lag && ice_pkg_has_lport_extract(&pf->hw)) ice_set_feature_support(pf, ICE_F_SRIOV_LAG); else ice_clear_feature_support(pf, ICE_F_SRIOV_LAG); diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h index ede833dfa6586..183b38792ef22 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.h +++ b/drivers/net/ethernet/intel/ice/ice_lag.h @@ -17,6 +17,9 @@ enum ice_lag_role { #define ICE_LAG_INVALID_PORT 0xFF #define ICE_LAG_RESET_RETRIES 5 +#define ICE_SW_DEFAULT_PROFILE 0 +#define ICE_FV_PROT_MDID 255 +#define ICE_LP_EXT_BUF_OFFSET 32 struct ice_pf; struct ice_vf; -- GitLab From 2ec197fda25f57afccac7f2846e509471488614c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 13 Feb 2024 06:20:55 -0800 Subject: [PATCH 1091/1405] selftests: tls: increase the wait in poll_partial_rec_async Test runners on debug kernels occasionally fail with: # # RUN tls_err.13_aes_gcm.poll_partial_rec_async ... # # tls.c:1883:poll_partial_rec_async:Expected poll(&pfd, 1, 5) (0) == 1 (1) # # tls.c:1870:poll_partial_rec_async:Expected status (256) == 0 (0) # # poll_partial_rec_async: Test failed at step #17 # # FAIL tls_err.13_aes_gcm.poll_partial_rec_async # not ok 699 tls_err.13_aes_gcm.poll_partial_rec_async # # FAILED: 698 / 699 tests passed. This points to the second poll() in the test which is expected to wait for the sender to send the rest of the data. Apparently under some conditions that doesn't happen within 5ms, bump the timeout to 20ms. Fixes: 23fcb62bc19c ("selftests: tls: add tests for poll behavior") Link: https://lore.kernel.org/r/20240213142055.395564-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index bc36c91c4480f..49c84602707f8 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1874,13 +1874,13 @@ TEST_F(tls_err, poll_partial_rec_async) /* Child should sleep in poll(), never get a wake */ pfd.fd = self->cfd2; pfd.events = POLLIN; - EXPECT_EQ(poll(&pfd, 1, 5), 0); + EXPECT_EQ(poll(&pfd, 1, 20), 0); EXPECT_EQ(write(p[1], &token, 1), 1); /* Barrier #1 */ pfd.fd = self->cfd2; pfd.events = POLLIN; - EXPECT_EQ(poll(&pfd, 1, 5), 1); + EXPECT_EQ(poll(&pfd, 1, 20), 1); exit(!_metadata->passed); } -- GitLab From 9377de4cb3e8fb6c494fa2f5ae2c3780d3e73822 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Thu, 8 Feb 2024 14:21:15 +0100 Subject: [PATCH 1092/1405] drm/xe/vm: Avoid reserving zero fences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function xe_vm_prepare_vma was blindly accepting zero as the number of fences and forwarded that to drm_exec_prepare_obj. However, that leads to an out-of-bounds shift in the dma_resv_reserve_fences() and while one could argue that the dma_resv code should be robust against that, avoid attempting to reserve zero fences. Relevant stack trace: [773.183188] ------------[ cut here ]------------ [773.183199] UBSAN: shift-out-of-bounds in ../include/linux/log2.h:57:13 [773.183241] shift exponent 64 is too large for 64-bit type 'long unsigned int' [773.183254] CPU: 2 PID: 1816 Comm: xe_evict Tainted: G U 6.8.0-rc3-xe #1 [773.183256] Hardware name: ASUS System Product Name/PRIME Z690-P D4, BIOS 2014 10/14/2022 [773.183257] Call Trace: [773.183258] [773.183260] dump_stack_lvl+0xaf/0xd0 [773.183266] dump_stack+0x10/0x20 [773.183283] ubsan_epilogue+0x9/0x40 [773.183286] __ubsan_handle_shift_out_of_bounds+0x10f/0x170 [773.183293] dma_resv_reserve_fences.cold+0x2b/0x48 [773.183295] ? ww_mutex_lock+0x3c/0x110 [773.183301] drm_exec_prepare_obj+0x45/0x60 [drm_exec] [773.183313] xe_vm_prepare_vma+0x33/0x70 [xe] [773.183375] xe_vma_destroy_unlocked+0x55/0xa0 [xe] [773.183427] xe_vm_close_and_put+0x526/0x940 [xe] Fixes: 2714d5093620 ("drm/xe: Convert pagefaulting code to use drm_exec") Cc: Thomas Hellström Cc: Matthew Brost Cc: Rodrigo Vivi Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240208132115.3132-1-thomas.hellstrom@linux.intel.com (cherry picked from commit eb538b5574251a449f40b1ee35efc631228c8992) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 865e10d0a06aa..7b00faa672879 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -995,9 +995,16 @@ int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, int err; XE_WARN_ON(!vm); - err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared); - if (!err && bo && !bo->vm) - err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared); + if (num_shared) + err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared); + else + err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); + if (!err && bo && !bo->vm) { + if (num_shared) + err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared); + else + err = drm_exec_lock_obj(exec, &bo->ttm.base); + } return err; } -- GitLab From c2626b7387210cff741be9fb91d317f02a70347c Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 5 Feb 2024 15:31:11 +0000 Subject: [PATCH 1093/1405] drm/xe/display: fix i915_gem_object_is_shmem() wrapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit shmem ensures the memory is cleared on allocation, however here we are using TTM, which doesn't natively support shmem (other than for swap), but instead just allocates normal system memory. And we only zero such memory for userspace allocations. In the case of intel_fbdev we are missing the memset_io() since display path incorrectly thinks object is shmem based. Fixes: 44e694958b95 ("drm/xe/display: Implement display support") Signed-off-by: Matthew Auld Reviewed-by: Suraj Kandpal Link: https://patchwork.freedesktop.org/patch/msgid/20240205153110.38340-2-matthew.auld@intel.com (cherry picked from commit 63fb531fbfda81bda652546a39333b565aea324d) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h index 68d9f6116bdfc..777c20ceabab1 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h @@ -10,7 +10,7 @@ #include "xe_bo.h" -#define i915_gem_object_is_shmem(obj) ((obj)->flags & XE_BO_CREATE_SYSTEM_BIT) +#define i915_gem_object_is_shmem(obj) (0) /* We don't use shmem */ static inline dma_addr_t i915_gem_object_get_dma_address(const struct xe_bo *bo, pgoff_t n) { -- GitLab From 8cb92dc730d8ae5f803dae1a6eb91fb9603f4237 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 9 Feb 2024 12:26:55 +0100 Subject: [PATCH 1094/1405] drm/xe/pt: Allow for stricter type- and range checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Distinguish between xe_pt and the xe_pt_dir subclass when allocating and freeing. Also use a fixed-size array for the xe_pt_dir page entries to make life easier for dynamic range- checkers. Finally rename the page-directory child pointer array to "children". While no functional change, this fixes ubsan splats similar to: [ 51.463021] ------------[ cut here ]------------ [ 51.463022] UBSAN: array-index-out-of-bounds in drivers/gpu/drm/xe/xe_pt.c:47:9 [ 51.463023] index 0 is out of range for type 'xe_ptw *[*]' [ 51.463024] CPU: 5 PID: 2778 Comm: xe_vm Tainted: G U 6.8.0-rc1+ #218 [ 51.463026] Hardware name: ASUS System Product Name/PRIME B560M-A AC, BIOS 2001 02/01/2023 [ 51.463027] Call Trace: [ 51.463028] [ 51.463029] dump_stack_lvl+0x47/0x60 [ 51.463030] __ubsan_handle_out_of_bounds+0x95/0xd0 [ 51.463032] xe_pt_destroy+0xa5/0x150 [xe] [ 51.463088] __xe_pt_unbind_vma+0x36c/0x9b0 [xe] [ 51.463144] xe_vm_unbind+0xd8/0x580 [xe] [ 51.463204] ? drm_exec_prepare_obj+0x3f/0x60 [drm_exec] [ 51.463208] __xe_vma_op_execute+0x5da/0x910 [xe] [ 51.463268] ? __drm_gpuvm_sm_unmap+0x1cb/0x220 [drm_gpuvm] [ 51.463272] ? radix_tree_node_alloc.constprop.0+0x89/0xc0 [ 51.463275] ? drm_gpuva_it_remove+0x1f3/0x2a0 [drm_gpuvm] [ 51.463279] ? drm_gpuva_remove+0x2f/0xc0 [drm_gpuvm] [ 51.463283] xe_vm_bind_ioctl+0x1a55/0x20b0 [xe] [ 51.463344] ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] [ 51.463414] drm_ioctl_kernel+0xb6/0x120 [ 51.463416] drm_ioctl+0x287/0x4e0 [ 51.463418] ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] [ 51.463481] __x64_sys_ioctl+0x94/0xd0 [ 51.463484] do_syscall_64+0x86/0x170 [ 51.463486] ? syscall_exit_to_user_mode+0x7d/0x200 [ 51.463488] ? do_syscall_64+0x96/0x170 [ 51.463490] ? do_syscall_64+0x96/0x170 [ 51.463492] entry_SYSCALL_64_after_hwframe+0x6e/0x76 [ 51.463494] RIP: 0033:0x7f246bfe817d [ 51.463498] Code: 04 25 28 00 00 00 48 89 45 c8 31 c0 48 8d 45 10 c7 45 b0 10 00 00 00 48 89 45 b8 48 8d 45 d0 48 89 45 c0 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 1a 48 8b 45 c8 64 48 2b 04 25 28 00 00 00 [ 51.463501] RSP: 002b:00007ffc1bd19ad0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 51.463502] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f246bfe817d [ 51.463504] RDX: 00007ffc1bd19b60 RSI: 0000000040886445 RDI: 0000000000000003 [ 51.463505] RBP: 00007ffc1bd19b20 R08: 0000000000000000 R09: 0000000000000000 [ 51.463506] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffc1bd19b60 [ 51.463508] R13: 0000000040886445 R14: 0000000000000003 R15: 0000000000010000 [ 51.463510] [ 51.463517] ---[ end trace ]--- v2 - Fix kerneldoc warning (Matthew Brost) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Rodrigo Vivi Cc: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240209112655.4872-1-thomas.hellstrom@linux.intel.com (cherry picked from commit 157261c58b283f5c83e3f9087eca63be8d591ab8) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_pt.c | 39 +++++++++++++++++++++------------ drivers/gpu/drm/xe/xe_pt_walk.c | 2 +- drivers/gpu/drm/xe/xe_pt_walk.h | 19 +++------------- 3 files changed, 29 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index e45b37c3f0c26..ac19bfa3f798c 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -20,8 +20,8 @@ struct xe_pt_dir { struct xe_pt pt; - /** @dir: Directory structure for the xe_pt_walk functionality */ - struct xe_ptw_dir dir; + /** @children: Array of page-table child nodes */ + struct xe_ptw *children[XE_PDES]; }; #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) @@ -44,7 +44,7 @@ static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) { - return container_of(pt_dir->dir.entries[index], struct xe_pt, base); + return container_of(pt_dir->children[index], struct xe_pt, base); } static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, @@ -65,6 +65,14 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, XE_PTE_NULL; } +static void xe_pt_free(struct xe_pt *pt) +{ + if (pt->level) + kfree(as_xe_pt_dir(pt)); + else + kfree(pt); +} + /** * xe_pt_create() - Create a page-table. * @vm: The vm to create for. @@ -85,15 +93,19 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, { struct xe_pt *pt; struct xe_bo *bo; - size_t size; int err; - size = !level ? sizeof(struct xe_pt) : sizeof(struct xe_pt_dir) + - XE_PDES * sizeof(struct xe_ptw *); - pt = kzalloc(size, GFP_KERNEL); + if (level) { + struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL); + + pt = (dir) ? &dir->pt : NULL; + } else { + pt = kzalloc(sizeof(*pt), GFP_KERNEL); + } if (!pt) return ERR_PTR(-ENOMEM); + pt->level = level; bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, ttm_bo_type_kernel, XE_BO_CREATE_VRAM_IF_DGFX(tile) | @@ -106,8 +118,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, goto err_kfree; } pt->bo = bo; - pt->level = level; - pt->base.dir = level ? &as_xe_pt_dir(pt)->dir : NULL; + pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; if (vm->xef) xe_drm_client_add_bo(vm->xef->client, pt->bo); @@ -116,7 +127,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, return pt; err_kfree: - kfree(pt); + xe_pt_free(pt); return ERR_PTR(err); } @@ -193,7 +204,7 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) deferred); } } - kfree(pt); + xe_pt_free(pt); } /** @@ -358,7 +369,7 @@ xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, struct iosys_map *map = &parent->bo->vmap; if (unlikely(xe_child)) - parent->base.dir->entries[offset] = &xe_child->base; + parent->base.children[offset] = &xe_child->base; xe_pt_write(xe_walk->vm->xe, map, offset, pte); parent->num_live++; @@ -853,7 +864,7 @@ static void xe_pt_commit_bind(struct xe_vma *vma, xe_pt_destroy(xe_pt_entry(pt_dir, j_), xe_vma_vm(vma)->flags, deferred); - pt_dir->dir.entries[j_] = &newpte->base; + pt_dir->children[j_] = &newpte->base; } kfree(entries[i].pt_entries); } @@ -1507,7 +1518,7 @@ xe_pt_commit_unbind(struct xe_vma *vma, xe_pt_destroy(xe_pt_entry(pt_dir, i), xe_vma_vm(vma)->flags, deferred); - pt_dir->dir.entries[i] = NULL; + pt_dir->children[i] = NULL; } } } diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c index 8f6c8d063f39f..b8b3d2aea4923 100644 --- a/drivers/gpu/drm/xe/xe_pt_walk.c +++ b/drivers/gpu/drm/xe/xe_pt_walk.c @@ -74,7 +74,7 @@ int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level, u64 addr, u64 end, struct xe_pt_walk *walk) { pgoff_t offset = xe_pt_offset(addr, level, walk); - struct xe_ptw **entries = parent->dir ? parent->dir->entries : NULL; + struct xe_ptw **entries = parent->children ? parent->children : NULL; const struct xe_pt_walk_ops *ops = walk->ops; enum page_walk_action action; struct xe_ptw *child; diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h index ec3d1e9efa6d5..5ecc4d2f0f653 100644 --- a/drivers/gpu/drm/xe/xe_pt_walk.h +++ b/drivers/gpu/drm/xe/xe_pt_walk.h @@ -8,28 +8,15 @@ #include #include -struct xe_ptw_dir; - /** * struct xe_ptw - base class for driver pagetable subclassing. - * @dir: Pointer to an array of children if any. + * @children: Pointer to an array of children if any. * * Drivers could subclass this, and if it's a page-directory, typically - * embed the xe_ptw_dir::entries array in the same allocation. + * embed an array of xe_ptw pointers. */ struct xe_ptw { - struct xe_ptw_dir *dir; -}; - -/** - * struct xe_ptw_dir - page directory structure - * @entries: Array holding page directory children. - * - * It is the responsibility of the user to ensure @entries is - * correctly sized. - */ -struct xe_ptw_dir { - struct xe_ptw *entries[0]; + struct xe_ptw **children; }; /** -- GitLab From 455dae7549aed709707feda5d6b3e085b37d33f7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 10:56:48 +0100 Subject: [PATCH 1095/1405] drm/xe: avoid function cast warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clang-16 warns about a cast between incompatible function types: drivers/gpu/drm/xe/xe_range_fence.c:155:10: error: cast from 'void (*)(const void *)' to 'void (*)(struct xe_range_fence *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 155 | .free = (void (*)(struct xe_range_fence *rfence)) kfree, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Avoid this with a trivial helper function that calls kfree() here. v2: - s/* rfence/*rfence/ (Thomas) Fixes: 845f64bdbfc9 ("drm/xe: Introduce a range-fence utility") Signed-off-by: Arnd Bergmann Reviewed-by: Thomas Hellström Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240213095719.454865-1-arnd@kernel.org (cherry picked from commit f2c9364db57992b1496db4ae5e67ab14926be3ec) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_range_fence.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_range_fence.c b/drivers/gpu/drm/xe/xe_range_fence.c index d35d9ec58e86f..372378e89e989 100644 --- a/drivers/gpu/drm/xe/xe_range_fence.c +++ b/drivers/gpu/drm/xe/xe_range_fence.c @@ -151,6 +151,11 @@ xe_range_fence_tree_next(struct xe_range_fence *rfence, u64 start, u64 last) return xe_range_fence_tree_iter_next(rfence, start, last); } +static void xe_range_fence_free(struct xe_range_fence *rfence) +{ + kfree(rfence); +} + const struct xe_range_fence_ops xe_range_fence_kfree_ops = { - .free = (void (*)(struct xe_range_fence *rfence)) kfree, + .free = xe_range_fence_free, }; -- GitLab From 488b6d91b07112eaaaa4454332c1480894d4e06e Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 13 Feb 2024 03:04:28 -0800 Subject: [PATCH 1096/1405] net-timestamp: make sk_tskey more predictable in error path When SOF_TIMESTAMPING_OPT_ID is used to ambiguate timestamped datagrams, the sk_tskey can become unpredictable in case of any error happened during sendmsg(). Move increment later in the code and make decrement of sk_tskey in error path. This solution is still racy in case of multiple threads doing snedmsg() over the very same socket in parallel, but still makes error path much more predictable. Fixes: 09c2d251b707 ("net-timestamp: add key to disambiguate concurrent datagrams") Reported-by: Andy Lutomirski Signed-off-by: Vadim Fedorenko Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240213110428.1681540-1-vadfed@meta.com Signed-off-by: Paolo Abeni --- net/ipv4/ip_output.c | 13 ++++++++----- net/ipv6/ip6_output.c | 13 ++++++++----- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 41537d18eecfd..67d846622365e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -972,8 +972,8 @@ static int __ip_append_data(struct sock *sk, unsigned int maxfraglen, fragheaderlen, maxnonfragsize; int csummode = CHECKSUM_NONE; struct rtable *rt = (struct rtable *)cork->dst; + bool paged, hold_tskey, extra_uref = false; unsigned int wmem_alloc_delta = 0; - bool paged, extra_uref = false; u32 tskey = 0; skb = skb_peek_tail(queue); @@ -982,10 +982,6 @@ static int __ip_append_data(struct sock *sk, mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize; paged = !!cork->gso_size; - if (cork->tx_flags & SKBTX_ANY_TSTAMP && - READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) - tskey = atomic_inc_return(&sk->sk_tskey) - 1; - hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); @@ -1052,6 +1048,11 @@ static int __ip_append_data(struct sock *sk, cork->length += length; + hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && + READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; + if (hold_tskey) + tskey = atomic_inc_return(&sk->sk_tskey) - 1; + /* So, what's going on in the loop below? * * We use calculated fragment length to generate chained skb, @@ -1274,6 +1275,8 @@ static int __ip_append_data(struct sock *sk, cork->length -= length; IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); + if (hold_tskey) + atomic_dec(&sk->sk_tskey); return err; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a722a43dd6685..31b86fe661aa6 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1424,11 +1424,11 @@ static int __ip6_append_data(struct sock *sk, bool zc = false; u32 tskey = 0; struct rt6_info *rt = (struct rt6_info *)cork->dst; + bool paged, hold_tskey, extra_uref = false; struct ipv6_txoptions *opt = v6_cork->opt; int csummode = CHECKSUM_NONE; unsigned int maxnonfragsize, headersize; unsigned int wmem_alloc_delta = 0; - bool paged, extra_uref = false; skb = skb_peek_tail(queue); if (!skb) { @@ -1440,10 +1440,6 @@ static int __ip6_append_data(struct sock *sk, mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; orig_mtu = mtu; - if (cork->tx_flags & SKBTX_ANY_TSTAMP && - READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) - tskey = atomic_inc_return(&sk->sk_tskey) - 1; - hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + @@ -1538,6 +1534,11 @@ static int __ip6_append_data(struct sock *sk, flags &= ~MSG_SPLICE_PAGES; } + hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP && + READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID; + if (hold_tskey) + tskey = atomic_inc_return(&sk->sk_tskey) - 1; + /* * Let's try using as much space as possible. * Use MTU if total length of the message fits into the MTU. @@ -1794,6 +1795,8 @@ static int __ip6_append_data(struct sock *sk, cork->length -= length; IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); + if (hold_tskey) + atomic_dec(&sk->sk_tskey); return err; } -- GitLab From fb091ff394792c018527b3211bbdfae93ea4ac02 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Wed, 14 Feb 2024 17:55:18 +0000 Subject: [PATCH 1097/1405] arm64: Subscribe Microsoft Azure Cobalt 100 to ARM Neoverse N2 errata Add the MIDR value of Microsoft Azure Cobalt 100, which is a Microsoft implemented CPU based on r0p0 of the ARM Neoverse N2 CPU, and therefore suffers from all the same errata. CC: stable@vger.kernel.org # 5.15+ Signed-off-by: Easwar Hariharan Reviewed-by: Anshuman Khandual Acked-by: Mark Rutland Acked-by: Marc Zyngier Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20240214175522.2457857-1-eahariha@linux.microsoft.com Signed-off-by: Will Deacon --- Documentation/arch/arm64/silicon-errata.rst | 7 +++++++ arch/arm64/include/asm/cputype.h | 4 ++++ arch/arm64/kernel/cpu_errata.c | 3 +++ 3 files changed, 14 insertions(+) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index e8c2ce1f9df68..45a7f4932fe07 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -243,3 +243,10 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ASR | ASR8601 | #8601001 | N/A | +----------------+-----------------+-----------------+-----------------------------+ ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2139208 | ARM64_ERRATUM_2139208 | ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2067961 | ARM64_ERRATUM_2067961 | ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2253138 | ARM64_ERRATUM_2253138 | ++----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 7c7493cb571f9..52f076afeb960 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -61,6 +61,7 @@ #define ARM_CPU_IMP_HISI 0x48 #define ARM_CPU_IMP_APPLE 0x61 #define ARM_CPU_IMP_AMPERE 0xC0 +#define ARM_CPU_IMP_MICROSOFT 0x6D #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -135,6 +136,8 @@ #define AMPERE_CPU_PART_AMPERE1 0xAC3 +#define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */ + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) @@ -193,6 +196,7 @@ #define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX) #define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) +#define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 967c7c7a4e7db..76b8dd37092ad 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -374,6 +374,7 @@ static const struct midr_range erratum_1463225[] = { static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_2139208 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), #endif #ifdef CONFIG_ARM64_ERRATUM_2119858 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), @@ -387,6 +388,7 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { static const struct midr_range tsb_flush_fail_cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_2067961 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), #endif #ifdef CONFIG_ARM64_ERRATUM_2054223 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), @@ -399,6 +401,7 @@ static const struct midr_range tsb_flush_fail_cpus[] = { static struct midr_range trbe_write_out_of_range_cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_2253138 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), #endif #ifdef CONFIG_ARM64_ERRATUM_2224489 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), -- GitLab From 2813926261e436d33bc74486b51cce60b76edf78 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 Feb 2024 18:24:38 +0000 Subject: [PATCH 1098/1405] arm64/sve: Lower the maximum allocation for the SVE ptrace regset Doug Anderson observed that ChromeOS crashes are being reported which include failing allocations of order 7 during core dumps due to ptrace allocating storage for regsets: chrome: page allocation failure: order:7, mode:0x40dc0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null),cpuset=urgent,mems_allowed=0 ... regset_get_alloc+0x1c/0x28 elf_core_dump+0x3d8/0xd8c do_coredump+0xeb8/0x1378 with further investigation showing that this is: [ 66.957385] DOUG: Allocating 279584 bytes which is the maximum size of the SVE regset. As Doug observes it is not entirely surprising that such a large allocation of contiguous memory might fail on a long running system. The SVE regset is currently sized to hold SVE registers with a VQ of SVE_VQ_MAX which is 512, substantially more than the architectural maximum of 16 which we might see even in a system emulating the limits of the architecture. Since we don't expose the size we tell the regset core externally let's define ARCH_SVE_VQ_MAX with the actual architectural maximum and use that for the regset, we'll still overallocate most of the time but much less so which will be helpful even if the core is fixed to not require contiguous allocations. Specify ARCH_SVE_VQ_MAX in terms of the maximum value that can be written into ZCR_ELx.LEN (where this is set in the hardware). For consistency update the maximum SME vector length to be specified in the same style while we are at it. We could also teach the ptrace core about runtime discoverable regset sizes but that would be a more invasive change and this is being observed in practical systems. Reported-by: Doug Anderson Signed-off-by: Mark Brown Tested-by: Douglas Anderson Link: https://lore.kernel.org/r/20240213-arm64-sve-ptrace-regset-size-v2-1-c7600ca74b9b@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 12 ++++++------ arch/arm64/kernel/ptrace.c | 3 ++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 50e5f25d3024c..481d94416d696 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -62,13 +62,13 @@ static inline void cpacr_restore(unsigned long cpacr) * When we defined the maximum SVE vector length we defined the ABI so * that the maximum vector length included all the reserved for future * expansion bits in ZCR rather than those just currently defined by - * the architecture. While SME follows a similar pattern the fact that - * it includes a square matrix means that any allocations that attempt - * to cover the maximum potential vector length (such as happen with - * the regset used for ptrace) end up being extremely large. Define - * the much lower actual limit for use in such situations. + * the architecture. Using this length to allocate worst size buffers + * results in excessively large allocations, and this effect is even + * more pronounced for SME due to ZA. Define more suitable VLs for + * these situations. */ -#define SME_VQ_MAX 16 +#define ARCH_SVE_VQ_MAX ((ZCR_ELx_LEN_MASK >> ZCR_ELx_LEN_SHIFT) + 1) +#define SME_VQ_MAX ((SMCR_ELx_LEN_MASK >> SMCR_ELx_LEN_SHIFT) + 1) struct task_struct; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index dc6cf0e37194e..e3bef38fc2e2d 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1500,7 +1500,8 @@ static const struct user_regset aarch64_regsets[] = { #ifdef CONFIG_ARM64_SVE [REGSET_SVE] = { /* Scalable Vector Extension */ .core_note_type = NT_ARM_SVE, - .n = DIV_ROUND_UP(SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE), + .n = DIV_ROUND_UP(SVE_PT_SIZE(ARCH_SVE_VQ_MAX, + SVE_PT_REGS_SVE), SVE_VQ_BYTES), .size = SVE_VQ_BYTES, .align = SVE_VQ_BYTES, -- GitLab From e5b2e810daf9f2d87fe132eb4d2a85fb08a0db98 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 13 Feb 2024 09:33:39 -0800 Subject: [PATCH 1099/1405] net: bcmasp: Handle RX buffer allocation failure The buffer_pg variable needs to hold an order-5 allocation (32 x PAGE_SIZE) which, under memory pressure may fail to be allocated. Deal with that error condition properly to avoid doing a NULL pointer de-reference in the subsequent call to dma_map_page(). In addition, the err_reclaim_tx error label in bcmasp_netif_init() needs to ensure that the TX NAPI object is properly deleted, otherwise unregister_netdev() will spin forever attempting to test and clear the NAPI_STATE_HASHED bit. Fixes: 490cb412007d ("net: bcmasp: Add support for ASP2.0 Ethernet controller") Signed-off-by: Florian Fainelli Reviewed-by: Justin Chen Link: https://lore.kernel.org/r/20240213173339.3438713-1-florian.fainelli@broadcom.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c index 53e5428812552..f59557b0cd515 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c @@ -684,6 +684,8 @@ static int bcmasp_init_rx(struct bcmasp_intf *intf) intf->rx_buf_order = get_order(RING_BUFFER_SIZE); buffer_pg = alloc_pages(GFP_KERNEL, intf->rx_buf_order); + if (!buffer_pg) + return -ENOMEM; dma = dma_map_page(kdev, buffer_pg, 0, RING_BUFFER_SIZE, DMA_FROM_DEVICE); @@ -1092,6 +1094,7 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect) return 0; err_reclaim_tx: + netif_napi_del(&intf->tx_napi); bcmasp_reclaim_free_all_tx(intf); err_phy_disconnect: if (phydev) -- GitLab From a951884d82886d8453d489f84f20ac168d062b38 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Feb 2024 09:32:08 +0100 Subject: [PATCH 1100/1405] kallsyms: ignore ARMv4 thunks along with others lld is now able to build ARMv4 and ARMv4T kernels, which means it can generate thunks for those (__ARMv4PILongThunk_*, __ARMv4PILongBXThunk_*) that can interfere with kallsyms table generation since they do not get ignore like the corresponding ARMv5+ ones are: Inconsistent kallsyms data Try "make KALLSYMS_EXTRA_PASS=1" as a workaround Replace the hardcoded list of thunk symbols with a more general regex that covers this one along with future symbols that follow the same pattern. Fixes: 5eb6e280432d ("ARM: 9289/1: Allow pre-ARMv5 builds with ld.lld 16.0.0 and newer") Fixes: efe6e3068067 ("kallsyms: fix nonconverging kallsyms table with lld") Suggested-by: Masahiro Yamada Signed-off-by: Arnd Bergmann Reviewed-by: Ard Biesheuvel Signed-off-by: Masahiro Yamada --- scripts/mksysmap | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/scripts/mksysmap b/scripts/mksysmap index 9ba1c9da0a40f..57ff5656d566f 100755 --- a/scripts/mksysmap +++ b/scripts/mksysmap @@ -48,17 +48,8 @@ ${NM} -n ${1} | sed >${2} -e " / __kvm_nvhe_\\$/d / __kvm_nvhe_\.L/d -# arm64 lld -/ __AArch64ADRPThunk_/d - -# arm lld -/ __ARMV5PILongThunk_/d -/ __ARMV7PILongThunk_/d -/ __ThumbV7PILongThunk_/d - -# mips lld -/ __LA25Thunk_/d -/ __microLA25Thunk_/d +# lld arm/aarch64/mips thunks +/ __[[:alnum:]]*Thunk_/d # CFI type identifiers / __kcfi_typeid_/d -- GitLab From 4e45170d9acc2d5ae8f545bf3f2f67504a361338 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Wed, 14 Feb 2024 11:22:24 +0300 Subject: [PATCH 1101/1405] net: sctp: fix skb leak in sctp_inq_free() In case of GSO, 'chunk->skb' pointer may point to an entry from fraglist created in 'sctp_packet_gso_append()'. To avoid freeing random fraglist entry (and so undefined behavior and/or memory leak), introduce 'sctp_inq_chunk_free()' helper to ensure that 'chunk->skb' is set to 'chunk->head_skb' (i.e. fraglist head) before calling 'sctp_chunk_free()', and use the aforementioned helper in 'sctp_inq_pop()' as well. Reported-by: syzbot+8bb053b5d63595ab47db@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?id=0d8351bbe54fd04a492c2daab0164138db008042 Fixes: 90017accff61 ("sctp: Add GSO support") Suggested-by: Xin Long Signed-off-by: Dmitry Antipov Acked-by: Xin Long Link: https://lore.kernel.org/r/20240214082224.10168-1-dmantipov@yandex.ru Signed-off-by: Jakub Kicinski --- net/sctp/inqueue.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 7182c5a450fb5..5c16521818058 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -38,6 +38,14 @@ void sctp_inq_init(struct sctp_inq *queue) INIT_WORK(&queue->immediate, NULL); } +/* Properly release the chunk which is being worked on. */ +static inline void sctp_inq_chunk_free(struct sctp_chunk *chunk) +{ + if (chunk->head_skb) + chunk->skb = chunk->head_skb; + sctp_chunk_free(chunk); +} + /* Release the memory associated with an SCTP inqueue. */ void sctp_inq_free(struct sctp_inq *queue) { @@ -53,7 +61,7 @@ void sctp_inq_free(struct sctp_inq *queue) * free it as well. */ if (queue->in_progress) { - sctp_chunk_free(queue->in_progress); + sctp_inq_chunk_free(queue->in_progress); queue->in_progress = NULL; } } @@ -130,9 +138,7 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) goto new_skb; } - if (chunk->head_skb) - chunk->skb = chunk->head_skb; - sctp_chunk_free(chunk); + sctp_inq_chunk_free(chunk); chunk = queue->in_progress = NULL; } else { /* Nothing to do. Next chunk in the packet, please. */ -- GitLab From dc34ebd5c018b0edf47f39d11083ad8312733034 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Wed, 14 Feb 2024 09:01:50 +0000 Subject: [PATCH 1102/1405] pppoe: Fix memory leak in pppoe_sendmsg() syzbot reports a memory leak in pppoe_sendmsg [1]. The problem is in the pppoe_recvmsg() function that handles errors in the wrong order. For the skb_recv_datagram() function, check the pointer to skb for NULL first, and then check the 'error' variable, because the skb_recv_datagram() function can set 'error' to -EAGAIN in a loop but return a correct pointer to socket buffer after a number of attempts, though 'error' remains set to -EAGAIN. skb_recv_datagram __skb_recv_datagram // Loop. if (err == -EAGAIN) then // go to the next loop iteration __skb_try_recv_datagram // if (skb != NULL) then return 'skb' // else if a signal is received then // return -EAGAIN Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) with Syzkaller. Link: https://syzkaller.appspot.com/bug?extid=6bdfd184eac7709e5cc9 [1] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+6bdfd184eac7709e5cc9@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=6bdfd184eac7709e5cc9 Signed-off-by: Gavrilov Ilia Reviewed-by: Guillaume Nault Link: https://lore.kernel.org/r/20240214085814.3894917-1-Ilia.Gavrilov@infotecs.ru Signed-off-by: Jakub Kicinski --- drivers/net/ppp/pppoe.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 8e7238e97d0a7..2ea4f4890d23b 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -1007,26 +1007,21 @@ static int pppoe_recvmsg(struct socket *sock, struct msghdr *m, struct sk_buff *skb; int error = 0; - if (sk->sk_state & PPPOX_BOUND) { - error = -EIO; - goto end; - } + if (sk->sk_state & PPPOX_BOUND) + return -EIO; skb = skb_recv_datagram(sk, flags, &error); - if (error < 0) - goto end; + if (!skb) + return error; - if (skb) { - total_len = min_t(size_t, total_len, skb->len); - error = skb_copy_datagram_msg(skb, 0, m, total_len); - if (error == 0) { - consume_skb(skb); - return total_len; - } + total_len = min_t(size_t, total_len, skb->len); + error = skb_copy_datagram_msg(skb, 0, m, total_len); + if (error == 0) { + consume_skb(skb); + return total_len; } kfree_skb(skb); -end: return error; } -- GitLab From ed4adc07207d9165a4b3b36199231a22e9f51a55 Mon Sep 17 00:00:00 2001 From: Paul Barker Date: Wed, 14 Feb 2024 15:12:04 +0000 Subject: [PATCH 1103/1405] net: ravb: Count packets instead of descriptors in GbEth RX path The units of "work done" in the RX path should be packets instead of descriptors, as large packets can be spread over multiple descriptors. Fixes: 1c59eb678cbd ("ravb: Fillup ravb_rx_gbeth() stub") Signed-off-by: Paul Barker Reviewed-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20240214151204.2976-1-paul.barker.ct@bp.renesas.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 0e3731f50fc28..f7566cfa45ca3 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -772,29 +772,25 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q) struct ravb_rx_desc *desc; struct sk_buff *skb; dma_addr_t dma_addr; + int rx_packets = 0; u8 desc_status; - int boguscnt; u16 pkt_len; u8 die_dt; int entry; int limit; + int i; entry = priv->cur_rx[q] % priv->num_rx_ring[q]; - boguscnt = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q]; + limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q]; stats = &priv->stats[q]; - boguscnt = min(boguscnt, *quota); - limit = boguscnt; desc = &priv->gbeth_rx_ring[entry]; - while (desc->die_dt != DT_FEMPTY) { + for (i = 0; i < limit && rx_packets < *quota && desc->die_dt != DT_FEMPTY; i++) { /* Descriptor type must be checked before all other reads */ dma_rmb(); desc_status = desc->msc; pkt_len = le16_to_cpu(desc->ds_cc) & RX_DS; - if (--boguscnt < 0) - break; - /* We use 0-byte descriptors to mark the DMA mapping errors */ if (!pkt_len) continue; @@ -820,7 +816,7 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q) skb_put(skb, pkt_len); skb->protocol = eth_type_trans(skb, ndev); napi_gro_receive(&priv->napi[q], skb); - stats->rx_packets++; + rx_packets++; stats->rx_bytes += pkt_len; break; case DT_FSTART: @@ -848,7 +844,7 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q) eth_type_trans(priv->rx_1st_skb, ndev); napi_gro_receive(&priv->napi[q], priv->rx_1st_skb); - stats->rx_packets++; + rx_packets++; stats->rx_bytes += pkt_len; break; } @@ -887,9 +883,9 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q) desc->die_dt = DT_FEMPTY; } - *quota -= limit - (++boguscnt); - - return boguscnt <= 0; + stats->rx_packets += rx_packets; + *quota -= rx_packets; + return *quota == 0; } /* Packet receive function for Ethernet AVB */ -- GitLab From 32f03f4002c5df837fb920eb23fcd2f4af9b0b23 Mon Sep 17 00:00:00 2001 From: Eniac Zhang Date: Thu, 15 Feb 2024 15:49:22 +0000 Subject: [PATCH 1104/1405] ALSA: hda/realtek: fix mute/micmute LED For HP mt645 The HP mt645 G7 Thin Client uses an ALC236 codec and needs the ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF quirk to make the mute and micmute LEDs work. There are two variants of the USB-C PD chip on this device. Each uses a different BIOS and board ID, hence the two entries. Signed-off-by: Eniac Zhang Signed-off-by: Alexandru Gagniuc Cc: Link: https://lore.kernel.org/r/20240215154922.778394-1-alexandru.gagniuc@hp.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5d0517465f085..0ec1312bffd5b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9930,6 +9930,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b0f, "HP Elite mt645 G7 Mobile Thin Client U81", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), @@ -9937,6 +9938,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8b45, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b46, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b47, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b59, "HP Elite mt645 G7 Mobile Thin Client U89", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b63, "HP Elite Dragonfly 13.5 inch G4", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), -- GitLab From 41c25e193b2befc22462aa41591d397fab174ca1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 15 Feb 2024 16:31:44 +0100 Subject: [PATCH 1105/1405] ALSA: usb-audio: More relaxed check of MIDI jack names The USB audio driver tries to retrieve MIDI jack name strings that can be used for rawmidi substream names and sequencer port names, but its checking is too strict: often the firmware provides the jack info for unexpected directions, and then we miss the info although it's present. In this patch, the code to extract the jack info is changed to allow both in and out directions in a single loop. That is, the former two functions to obtain the descriptor pointers for jack in and out are changed to a single function that returns iJack of the corresponding jack ID, no matter which direction is used. It's a code simplification at the same time as well as the fix. Fixes: eb596e0fd13c ("ALSA: usb-audio: generate midi streaming substream names from jack names") Link: https://lore.kernel.org/r/20240215153144.26047-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/midi.c | 73 +++++++++++++++++------------------------------- 1 file changed, 25 insertions(+), 48 deletions(-) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 6b0993258e039..c1f2e5a03de96 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1742,50 +1742,44 @@ static void snd_usbmidi_get_port_info(struct snd_rawmidi *rmidi, int number, } } -static struct usb_midi_in_jack_descriptor *find_usb_in_jack_descriptor( - struct usb_host_interface *hostif, uint8_t jack_id) +/* return iJack for the corresponding jackID */ +static int find_usb_ijack(struct usb_host_interface *hostif, uint8_t jack_id) { unsigned char *extra = hostif->extra; int extralen = hostif->extralen; + struct usb_descriptor_header *h; + struct usb_midi_out_jack_descriptor *outjd; + struct usb_midi_in_jack_descriptor *injd; + size_t sz; while (extralen > 4) { - struct usb_midi_in_jack_descriptor *injd = - (struct usb_midi_in_jack_descriptor *)extra; + h = (struct usb_descriptor_header *)extra; + if (h->bDescriptorType != USB_DT_CS_INTERFACE) + goto next; + outjd = (struct usb_midi_out_jack_descriptor *)h; + if (h->bLength >= sizeof(*outjd) && + outjd->bDescriptorSubtype == UAC_MIDI_OUT_JACK && + outjd->bJackID == jack_id) { + sz = USB_DT_MIDI_OUT_SIZE(outjd->bNrInputPins); + if (outjd->bLength < sz) + goto next; + return *(extra + sz - 1); + } + + injd = (struct usb_midi_in_jack_descriptor *)h; if (injd->bLength >= sizeof(*injd) && - injd->bDescriptorType == USB_DT_CS_INTERFACE && injd->bDescriptorSubtype == UAC_MIDI_IN_JACK && - injd->bJackID == jack_id) - return injd; - if (!extra[0]) - break; - extralen -= extra[0]; - extra += extra[0]; - } - return NULL; -} - -static struct usb_midi_out_jack_descriptor *find_usb_out_jack_descriptor( - struct usb_host_interface *hostif, uint8_t jack_id) -{ - unsigned char *extra = hostif->extra; - int extralen = hostif->extralen; + injd->bJackID == jack_id) + return injd->iJack; - while (extralen > 4) { - struct usb_midi_out_jack_descriptor *outjd = - (struct usb_midi_out_jack_descriptor *)extra; - - if (outjd->bLength >= sizeof(*outjd) && - outjd->bDescriptorType == USB_DT_CS_INTERFACE && - outjd->bDescriptorSubtype == UAC_MIDI_OUT_JACK && - outjd->bJackID == jack_id) - return outjd; +next: if (!extra[0]) break; extralen -= extra[0]; extra += extra[0]; } - return NULL; + return 0; } static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi, @@ -1796,13 +1790,10 @@ static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi, const char *name_format; struct usb_interface *intf; struct usb_host_interface *hostif; - struct usb_midi_in_jack_descriptor *injd; - struct usb_midi_out_jack_descriptor *outjd; uint8_t jack_name_buf[32]; uint8_t *default_jack_name = "MIDI"; uint8_t *jack_name = default_jack_name; uint8_t iJack; - size_t sz; int res; struct snd_rawmidi_substream *substream = @@ -1816,21 +1807,7 @@ static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi, intf = umidi->iface; if (intf && jack_id >= 0) { hostif = intf->cur_altsetting; - iJack = 0; - if (stream != SNDRV_RAWMIDI_STREAM_OUTPUT) { - /* in jacks connect to outs */ - outjd = find_usb_out_jack_descriptor(hostif, jack_id); - if (outjd) { - sz = USB_DT_MIDI_OUT_SIZE(outjd->bNrInputPins); - if (outjd->bLength >= sz) - iJack = *(((uint8_t *) outjd) + sz - sizeof(uint8_t)); - } - } else { - /* and out jacks connect to ins */ - injd = find_usb_in_jack_descriptor(hostif, jack_id); - if (injd) - iJack = injd->iJack; - } + iJack = find_usb_ijack(hostif, jack_id); if (iJack != 0) { res = usb_string(umidi->dev, iJack, jack_name_buf, ARRAY_SIZE(jack_name_buf)); -- GitLab From 5b8e3464071a4401978a91d2e9f0beca308996c2 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 14 Feb 2024 07:27:35 -0800 Subject: [PATCH 1106/1405] net: fill in MODULE_DESCRIPTION()s for xen-netback W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the Xen backend network module. Signed-off-by: Breno Leitao Acked-by: Paul Durrant Link: https://lore.kernel.org/r/20240214152741.670178-2-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/xen-netback/netback.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index fab361a250d60..ef76850d9bcd2 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1778,5 +1778,6 @@ static void __exit netback_fini(void) } module_exit(netback_fini); +MODULE_DESCRIPTION("Xen backend network device module"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS("xen-backend:vif"); -- GitLab From c0872309ac8432e309824ece24238e8fd2768ef8 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 14 Feb 2024 07:27:36 -0800 Subject: [PATCH 1107/1405] net: fill in MODULE_DESCRIPTION()s for ieee802154/fakelb W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the IEEE 802.15.4 loopback driver. Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240214152741.670178-3-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/ieee802154/fakelb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ieee802154/fakelb.c b/drivers/net/ieee802154/fakelb.c index 35e55f198e05c..2930141d7dd2d 100644 --- a/drivers/net/ieee802154/fakelb.c +++ b/drivers/net/ieee802154/fakelb.c @@ -259,4 +259,5 @@ static __exit void fake_remove_module(void) module_init(fakelb_init_module); module_exit(fake_remove_module); +MODULE_DESCRIPTION("IEEE 802.15.4 loopback driver"); MODULE_LICENSE("GPL"); -- GitLab From 44c1197bcef49bdf1021bef7cdb32520b2bc1ce4 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 14 Feb 2024 07:27:37 -0800 Subject: [PATCH 1108/1405] net: fill in MODULE_DESCRIPTION()s for plip W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the PLIP (parallel port) network module Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240214152741.670178-4-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/plip/plip.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c index 40ce8abe69995..cc7d1113ece0e 100644 --- a/drivers/net/plip/plip.c +++ b/drivers/net/plip/plip.c @@ -1437,4 +1437,5 @@ static int __init plip_init (void) module_init(plip_init); module_exit(plip_cleanup_module); +MODULE_DESCRIPTION("PLIP (parallel port) network module"); MODULE_LICENSE("GPL"); -- GitLab From 4ad9e85874393eec74edf2d2c7b7c7ba11f78d20 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 14 Feb 2024 07:27:38 -0800 Subject: [PATCH 1109/1405] net: fill in MODULE_DESCRIPTION()s for fddik/skfp W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the SysKonnect FDDI PCI module. Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240214152741.670178-5-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/fddi/skfp/skfddi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/fddi/skfp/skfddi.c b/drivers/net/fddi/skfp/skfddi.c index 2b6a607ac0b78..a273362c9e703 100644 --- a/drivers/net/fddi/skfp/skfddi.c +++ b/drivers/net/fddi/skfp/skfddi.c @@ -153,6 +153,7 @@ static const struct pci_device_id skfddi_pci_tbl[] = { { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(pci, skfddi_pci_tbl); +MODULE_DESCRIPTION("SysKonnect FDDI PCI driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Mirko Lindner "); -- GitLab From e1e5ef2aefc2ab77dacf77626f9f25f332ba91b7 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 14 Feb 2024 07:27:39 -0800 Subject: [PATCH 1110/1405] net: fill in MODULE_DESCRIPTION()s for ppp W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the PPP modules. Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240214152741.670178-6-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/ppp/bsd_comp.c | 1 + drivers/net/ppp/ppp_async.c | 1 + drivers/net/ppp/ppp_deflate.c | 1 + drivers/net/ppp/ppp_generic.c | 1 + drivers/net/ppp/ppp_synctty.c | 1 + 5 files changed, 5 insertions(+) diff --git a/drivers/net/ppp/bsd_comp.c b/drivers/net/ppp/bsd_comp.c index db0dc36d12e33..55954594e157e 100644 --- a/drivers/net/ppp/bsd_comp.c +++ b/drivers/net/ppp/bsd_comp.c @@ -1166,5 +1166,6 @@ static void __exit bsdcomp_cleanup(void) module_init(bsdcomp_init); module_exit(bsdcomp_cleanup); +MODULE_DESCRIPTION("PPP BSD-Compress compression module"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS("ppp-compress-" __stringify(CI_BSD_COMPRESS)); diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index 125793d8aefa7..c33c3db3cc089 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c @@ -87,6 +87,7 @@ struct asyncppp { static int flag_time = HZ; module_param(flag_time, int, 0); MODULE_PARM_DESC(flag_time, "ppp_async: interval between flagged packets (in clock ticks)"); +MODULE_DESCRIPTION("PPP async serial channel module"); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_PPP); diff --git a/drivers/net/ppp/ppp_deflate.c b/drivers/net/ppp/ppp_deflate.c index e6d48e5c65a33..4d2ff63f2ee2f 100644 --- a/drivers/net/ppp/ppp_deflate.c +++ b/drivers/net/ppp/ppp_deflate.c @@ -630,6 +630,7 @@ static void __exit deflate_cleanup(void) module_init(deflate_init); module_exit(deflate_cleanup); +MODULE_DESCRIPTION("PPP Deflate compression module"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS("ppp-compress-" __stringify(CI_DEFLATE)); MODULE_ALIAS("ppp-compress-" __stringify(CI_DEFLATE_DRAFT)); diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 0193af2d31c9b..3dd52bf28f15b 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -3604,6 +3604,7 @@ EXPORT_SYMBOL(ppp_input_error); EXPORT_SYMBOL(ppp_output_wakeup); EXPORT_SYMBOL(ppp_register_compressor); EXPORT_SYMBOL(ppp_unregister_compressor); +MODULE_DESCRIPTION("Generic PPP layer driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_CHARDEV(PPP_MAJOR, 0); MODULE_ALIAS_RTNL_LINK("ppp"); diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c index 52d05ce4a2819..45bf59ac8f571 100644 --- a/drivers/net/ppp/ppp_synctty.c +++ b/drivers/net/ppp/ppp_synctty.c @@ -724,5 +724,6 @@ ppp_sync_cleanup(void) module_init(ppp_sync_init); module_exit(ppp_sync_cleanup); +MODULE_DESCRIPTION("PPP synchronous TTY channel module"); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_SYNC_PPP); -- GitLab From 9de69f0e99585d84baab4fbb0b3b97c02cf069f1 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 14 Feb 2024 07:27:40 -0800 Subject: [PATCH 1111/1405] net: fill in MODULE_DESCRIPTION()s for mdio_devres W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the PHY MDIO helpers. Suggested-by: Andrew Lunn Signed-off-by: Breno Leitao Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20240214152741.670178-7-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/mdio_devres.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/mdio_devres.c b/drivers/net/phy/mdio_devres.c index 69b829e6ab35b..7fd3377dbd796 100644 --- a/drivers/net/phy/mdio_devres.c +++ b/drivers/net/phy/mdio_devres.c @@ -131,4 +131,5 @@ int __devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, EXPORT_SYMBOL(__devm_of_mdiobus_register); #endif /* CONFIG_OF_MDIO */ +MODULE_DESCRIPTION("Network MDIO bus devres helpers"); MODULE_LICENSE("GPL"); -- GitLab From 538b22e74287864014b3be7481042fb904a0cc0d Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 14 Feb 2024 07:27:41 -0800 Subject: [PATCH 1112/1405] net: fill in MODULE_DESCRIPTION()s for missing arcnet W=1 builds now warn if module is built without a MODULE_DESCRIPTION(). Add descriptions to the ARC modules. Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240214152741.670178-8-leitao@debian.org Signed-off-by: Jakub Kicinski --- drivers/net/arcnet/arc-rawmode.c | 1 + drivers/net/arcnet/arc-rimi.c | 1 + drivers/net/arcnet/capmode.c | 1 + drivers/net/arcnet/com20020-pci.c | 1 + drivers/net/arcnet/com20020.c | 1 + drivers/net/arcnet/com20020_cs.c | 1 + drivers/net/arcnet/com90io.c | 1 + drivers/net/arcnet/com90xx.c | 1 + drivers/net/arcnet/rfc1051.c | 1 + drivers/net/arcnet/rfc1201.c | 1 + 10 files changed, 10 insertions(+) diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c index 8c651fdee039a..57f1729066f28 100644 --- a/drivers/net/arcnet/arc-rawmode.c +++ b/drivers/net/arcnet/arc-rawmode.c @@ -186,4 +186,5 @@ static void __exit arcnet_raw_exit(void) module_init(arcnet_raw_init); module_exit(arcnet_raw_exit); +MODULE_DESCRIPTION("ARCnet raw mode packet interface module"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c index 8c3ccc7c83cd3..53d10a04d1bd0 100644 --- a/drivers/net/arcnet/arc-rimi.c +++ b/drivers/net/arcnet/arc-rimi.c @@ -312,6 +312,7 @@ module_param(node, int, 0); module_param(io, int, 0); module_param(irq, int, 0); module_param_string(device, device, sizeof(device), 0); +MODULE_DESCRIPTION("ARCnet COM90xx RIM I chipset driver"); MODULE_LICENSE("GPL"); static struct net_device *my_dev; diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c index c09b567845e1e..7a0a799737698 100644 --- a/drivers/net/arcnet/capmode.c +++ b/drivers/net/arcnet/capmode.c @@ -265,4 +265,5 @@ static void __exit capmode_module_exit(void) module_init(capmode_module_init); module_exit(capmode_module_exit); +MODULE_DESCRIPTION("ARCnet CAP mode packet interface module"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index 7b5c8bb02f119..c5e571ec94c99 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -61,6 +61,7 @@ module_param(timeout, int, 0); module_param(backplane, int, 0); module_param(clockp, int, 0); module_param(clockm, int, 0); +MODULE_DESCRIPTION("ARCnet COM20020 chipset PCI driver"); MODULE_LICENSE("GPL"); static void led_tx_set(struct led_classdev *led_cdev, diff --git a/drivers/net/arcnet/com20020.c b/drivers/net/arcnet/com20020.c index 06e1651b594ba..a0053e3992a36 100644 --- a/drivers/net/arcnet/com20020.c +++ b/drivers/net/arcnet/com20020.c @@ -399,6 +399,7 @@ EXPORT_SYMBOL(com20020_found); EXPORT_SYMBOL(com20020_netdev_ops); #endif +MODULE_DESCRIPTION("ARCnet COM20020 chipset core driver"); MODULE_LICENSE("GPL"); #ifdef MODULE diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c index dc3253b318daf..75f08aa7528b4 100644 --- a/drivers/net/arcnet/com20020_cs.c +++ b/drivers/net/arcnet/com20020_cs.c @@ -97,6 +97,7 @@ module_param(backplane, int, 0); module_param(clockp, int, 0); module_param(clockm, int, 0); +MODULE_DESCRIPTION("ARCnet COM20020 chipset PCMCIA driver"); MODULE_LICENSE("GPL"); /*====================================================================*/ diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c index 37b47749fc8b4..3b463fbc64021 100644 --- a/drivers/net/arcnet/com90io.c +++ b/drivers/net/arcnet/com90io.c @@ -350,6 +350,7 @@ static char device[9]; /* use eg. device=arc1 to change name */ module_param_hw(io, int, ioport, 0); module_param_hw(irq, int, irq, 0); module_param_string(device, device, sizeof(device), 0); +MODULE_DESCRIPTION("ARCnet COM90xx IO mapped chipset driver"); MODULE_LICENSE("GPL"); #ifndef MODULE diff --git a/drivers/net/arcnet/com90xx.c b/drivers/net/arcnet/com90xx.c index f49dae1942846..b3b287c165617 100644 --- a/drivers/net/arcnet/com90xx.c +++ b/drivers/net/arcnet/com90xx.c @@ -645,6 +645,7 @@ static void com90xx_copy_from_card(struct net_device *dev, int bufnum, TIME(dev, "memcpy_fromio", count, memcpy_fromio(buf, memaddr, count)); } +MODULE_DESCRIPTION("ARCnet COM90xx normal chipset driver"); MODULE_LICENSE("GPL"); static int __init com90xx_init(void) diff --git a/drivers/net/arcnet/rfc1051.c b/drivers/net/arcnet/rfc1051.c index a7752a5b647fc..46519ca63a0aa 100644 --- a/drivers/net/arcnet/rfc1051.c +++ b/drivers/net/arcnet/rfc1051.c @@ -78,6 +78,7 @@ static void __exit arcnet_rfc1051_exit(void) module_init(arcnet_rfc1051_init); module_exit(arcnet_rfc1051_exit); +MODULE_DESCRIPTION("ARCNet packet format (RFC 1051) module"); MODULE_LICENSE("GPL"); /* Determine a packet's protocol ID. diff --git a/drivers/net/arcnet/rfc1201.c b/drivers/net/arcnet/rfc1201.c index a4c856282674b..0edf35d971c56 100644 --- a/drivers/net/arcnet/rfc1201.c +++ b/drivers/net/arcnet/rfc1201.c @@ -35,6 +35,7 @@ #include "arcdevice.h" +MODULE_DESCRIPTION("ARCNet packet format (RFC 1201) module"); MODULE_LICENSE("GPL"); static __be16 type_trans(struct sk_buff *skb, struct net_device *dev); -- GitLab From 9b6326354cf9a41521b79287da3bfab022ae0b6d Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 14 Feb 2024 23:05:56 +0100 Subject: [PATCH 1113/1405] tracing/synthetic: Fix trace_string() return value Fix trace_string() by assigning the string length to the return variable which got lost in commit ddeea494a16f ("tracing/synthetic: Use union instead of casts") and caused trace_string() to always return 0. Link: https://lore.kernel.org/linux-trace-kernel/20240214220555.711598-1-thorsten.blum@toblux.com Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Fixes: ddeea494a16f ("tracing/synthetic: Use union instead of casts") Acked-by: Masami Hiramatsu (Google) Signed-off-by: Thorsten Blum Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_synth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index e7af286af4f1a..c82b401a294d9 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -441,8 +441,9 @@ static unsigned int trace_string(struct synth_trace_event *entry, if (is_dynamic) { union trace_synth_field *data = &entry->fields[*n_u64]; + len = fetch_store_strlen((unsigned long)str_val); data->as_dynamic.offset = struct_size(entry, fields, event->n_u64) + data_size; - data->as_dynamic.len = fetch_store_strlen((unsigned long)str_val); + data->as_dynamic.len = len; ret = fetch_store_string((unsigned long)str_val, &entry->fields[*n_u64], entry); -- GitLab From 8c7bfd8262319fd3f127a5380f593ea76f1b88a2 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 13 Feb 2024 09:23:40 -0800 Subject: [PATCH 1114/1405] drm/msm: Wire up tlb ops The brute force iommu_flush_iotlb_all() was good enough for unmap, but in some cases a map operation could require removing a table pte entry to replace with a block entry. This also requires tlb invalidation. Missing this was resulting an obscure iova fault on what should be a valid buffer address. Thanks to Robin Murphy for helping me understand the cause of the fault. Cc: Robin Murphy Cc: stable@vger.kernel.org Fixes: b145c6e65eb0 ("drm/msm: Add support to create a local pagetable") Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/578117/ --- drivers/gpu/drm/msm/msm_iommu.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 5cc8d358cc975..d5512037c38bc 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -21,6 +21,8 @@ struct msm_iommu_pagetable { struct msm_mmu base; struct msm_mmu *parent; struct io_pgtable_ops *pgtbl_ops; + const struct iommu_flush_ops *tlb; + struct device *iommu_dev; unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ phys_addr_t ttbr; u32 asid; @@ -201,11 +203,33 @@ static const struct msm_mmu_funcs pagetable_funcs = { static void msm_iommu_tlb_flush_all(void *cookie) { + struct msm_iommu_pagetable *pagetable = cookie; + struct adreno_smmu_priv *adreno_smmu; + + if (!pm_runtime_get_if_in_use(pagetable->iommu_dev)) + return; + + adreno_smmu = dev_get_drvdata(pagetable->parent->dev); + + pagetable->tlb->tlb_flush_all((void *)adreno_smmu->cookie); + + pm_runtime_put_autosuspend(pagetable->iommu_dev); } static void msm_iommu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule, void *cookie) { + struct msm_iommu_pagetable *pagetable = cookie; + struct adreno_smmu_priv *adreno_smmu; + + if (!pm_runtime_get_if_in_use(pagetable->iommu_dev)) + return; + + adreno_smmu = dev_get_drvdata(pagetable->parent->dev); + + pagetable->tlb->tlb_flush_walk(iova, size, granule, (void *)adreno_smmu->cookie); + + pm_runtime_put_autosuspend(pagetable->iommu_dev); } static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather, @@ -213,7 +237,7 @@ static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather, { } -static const struct iommu_flush_ops null_tlb_ops = { +static const struct iommu_flush_ops tlb_ops = { .tlb_flush_all = msm_iommu_tlb_flush_all, .tlb_flush_walk = msm_iommu_tlb_flush_walk, .tlb_add_page = msm_iommu_tlb_add_page, @@ -254,10 +278,10 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) /* The incoming cfg will have the TTBR1 quirk enabled */ ttbr0_cfg.quirks &= ~IO_PGTABLE_QUIRK_ARM_TTBR1; - ttbr0_cfg.tlb = &null_tlb_ops; + ttbr0_cfg.tlb = &tlb_ops; pagetable->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, - &ttbr0_cfg, iommu->domain); + &ttbr0_cfg, pagetable); if (!pagetable->pgtbl_ops) { kfree(pagetable); @@ -279,6 +303,8 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) /* Needed later for TLB flush */ pagetable->parent = parent; + pagetable->tlb = ttbr1_cfg->tlb; + pagetable->iommu_dev = ttbr1_cfg->iommu_dev; pagetable->pgsize_bitmap = ttbr0_cfg.pgsize_bitmap; pagetable->ttbr = ttbr0_cfg.arm_lpae_s1_cfg.ttbr; -- GitLab From 8a566f94104df87a067458351675129bb4e1ece2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 15 Feb 2024 16:22:55 +0200 Subject: [PATCH 1115/1405] seq_buf: Don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Link: https://lkml.kernel.org/r/20240215142255.400264-1-andriy.shevchenko@linux.intel.com Cc: "Matthew Wilcox (Oracle)" Cc: Andrew Morton Signed-off-by: Andy Shevchenko Signed-off-by: Steven Rostedt (Google) --- include/linux/seq_buf.h | 5 ++++- lib/seq_buf.c | 14 ++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index c44f4b47b9453..07b26e7510601 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -2,7 +2,10 @@ #ifndef _LINUX_SEQ_BUF_H #define _LINUX_SEQ_BUF_H -#include +#include +#include +#include +#include /* * Trace sequences are used to allow a function to call several other functions diff --git a/lib/seq_buf.c b/lib/seq_buf.c index 010c730ca7fca..dfbfdc497d85a 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -13,9 +13,19 @@ * seq_buf_init() more than once to reset the seq_buf to start * from scratch. */ -#include -#include + +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include /** * seq_buf_can_fit - can the new data fit in the current buffer? -- GitLab From 6efe4d18796934b8ada66c1c446510e7f2d9b972 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 15 Feb 2024 17:25:06 +0200 Subject: [PATCH 1116/1405] seq_buf: Fix kernel documentation There are plenty of issues with the kernel documentation here: - misspelled word "sequence" - different style of returned value descriptions - missed Return sections - unaligned style of ASCII / NUL-terminated / etc - wrong function references Fix all these. Link: https://lkml.kernel.org/r/20240215152506.598340-1-andriy.shevchenko@linux.intel.com Cc: Andrew Morton Signed-off-by: Andy Shevchenko Reviewed-by: Randy Dunlap Signed-off-by: Steven Rostedt (Google) --- include/linux/seq_buf.h | 12 ++++++------ lib/seq_buf.c | 35 ++++++++++++++++++----------------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 07b26e7510601..fe41da0059700 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -13,7 +13,7 @@ */ /** - * seq_buf - seq buffer structure + * struct seq_buf - seq buffer structure * @buffer: pointer to the buffer * @size: size of the buffer * @len: the amount of data inside the buffer @@ -80,10 +80,10 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) } /** - * seq_buf_str - get %NUL-terminated C string from seq_buf + * seq_buf_str - get NUL-terminated C string from seq_buf * @s: the seq_buf handle * - * This makes sure that the buffer in @s is nul terminated and + * This makes sure that the buffer in @s is NUL-terminated and * safe to read as a string. * * Note, if this is called when the buffer has overflowed, then @@ -93,7 +93,7 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) * After this function is called, s->buffer is safe to use * in string operations. * - * Returns @s->buf after making sure it is terminated. + * Returns: @s->buf after making sure it is terminated. */ static inline const char *seq_buf_str(struct seq_buf *s) { @@ -113,7 +113,7 @@ static inline const char *seq_buf_str(struct seq_buf *s) * @s: the seq_buf handle * @bufp: the beginning of the buffer is stored here * - * Return the number of bytes available in the buffer, or zero if + * Returns: the number of bytes available in the buffer, or zero if * there's no space. */ static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp) @@ -135,7 +135,7 @@ static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp) * @num: the number of bytes to commit * * Commit @num bytes of data written to a buffer previously acquired - * by seq_buf_get. To signal an error condition, or that the data + * by seq_buf_get_buf(). To signal an error condition, or that the data * didn't fit in the available space, pass a negative @num value. */ static inline void seq_buf_commit(struct seq_buf *s, int num) diff --git a/lib/seq_buf.c b/lib/seq_buf.c index dfbfdc497d85a..f3f3436d60a94 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -32,7 +32,7 @@ * @s: the seq_buf descriptor * @len: The length to see if it can fit in the current buffer * - * Returns true if there's enough unused space in the seq_buf buffer + * Returns: true if there's enough unused space in the seq_buf buffer * to fit the amount of new data according to @len. */ static bool seq_buf_can_fit(struct seq_buf *s, size_t len) @@ -45,7 +45,7 @@ static bool seq_buf_can_fit(struct seq_buf *s, size_t len) * @m: the seq_file descriptor that is the destination * @s: the seq_buf descriptor that is the source. * - * Returns zero on success, non zero otherwise + * Returns: zero on success, non-zero otherwise. */ int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s) { @@ -60,9 +60,9 @@ int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s) * @fmt: printf format string * @args: va_list of arguments from a printf() type function * - * Writes a vnprintf() format into the sequencce buffer. + * Writes a vnprintf() format into the sequence buffer. * - * Returns zero on success, -1 on overflow. + * Returns: zero on success, -1 on overflow. */ int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args) { @@ -88,7 +88,7 @@ int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args) * * Writes a printf() format into the sequence buffer. * - * Returns zero on success, -1 on overflow. + * Returns: zero on success, -1 on overflow. */ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...) { @@ -104,12 +104,12 @@ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...) EXPORT_SYMBOL_GPL(seq_buf_printf); /** - * seq_buf_do_printk - printk seq_buf line by line + * seq_buf_do_printk - printk() seq_buf line by line * @s: seq_buf descriptor * @lvl: printk level * * printk()-s a multi-line sequential buffer line by line. The function - * makes sure that the buffer in @s is nul terminated and safe to read + * makes sure that the buffer in @s is NUL-terminated and safe to read * as a string. */ void seq_buf_do_printk(struct seq_buf *s, const char *lvl) @@ -149,7 +149,7 @@ EXPORT_SYMBOL_GPL(seq_buf_do_printk); * This function will take the format and the binary array and finish * the conversion into the ASCII string within the buffer. * - * Returns zero on success, -1 on overflow. + * Returns: zero on success, -1 on overflow. */ int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary) { @@ -177,7 +177,7 @@ int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary) * * Copy a simple string into the sequence buffer. * - * Returns zero on success, -1 on overflow + * Returns: zero on success, -1 on overflow. */ int seq_buf_puts(struct seq_buf *s, const char *str) { @@ -206,7 +206,7 @@ EXPORT_SYMBOL_GPL(seq_buf_puts); * * Copy a single character into the sequence buffer. * - * Returns zero on success, -1 on overflow + * Returns: zero on success, -1 on overflow. */ int seq_buf_putc(struct seq_buf *s, unsigned char c) { @@ -222,7 +222,7 @@ int seq_buf_putc(struct seq_buf *s, unsigned char c) EXPORT_SYMBOL_GPL(seq_buf_putc); /** - * seq_buf_putmem - write raw data into the sequenc buffer + * seq_buf_putmem - write raw data into the sequence buffer * @s: seq_buf descriptor * @mem: The raw memory to copy into the buffer * @len: The length of the raw memory to copy (in bytes) @@ -231,7 +231,7 @@ EXPORT_SYMBOL_GPL(seq_buf_putc); * buffer and a strcpy() would not work. Using this function allows * for such cases. * - * Returns zero on success, -1 on overflow + * Returns: zero on success, -1 on overflow. */ int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len) { @@ -259,7 +259,7 @@ int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len) * raw memory into the buffer it writes its ASCII representation of it * in hex characters. * - * Returns zero on success, -1 on overflow + * Returns: zero on success, -1 on overflow. */ int seq_buf_putmem_hex(struct seq_buf *s, const void *mem, unsigned int len) @@ -307,7 +307,7 @@ int seq_buf_putmem_hex(struct seq_buf *s, const void *mem, * * Write a path name into the sequence buffer. * - * Returns the number of written bytes on success, -1 on overflow + * Returns: the number of written bytes on success, -1 on overflow. */ int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc) { @@ -342,6 +342,7 @@ int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc) * or until it reaches the end of the content in the buffer (@s->len), * whichever comes first. * + * Returns: * On success, it returns a positive number of the number of bytes * it copied. * @@ -392,11 +393,11 @@ int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, size_t start, int cnt) * linebuf size is maximal length for one line. * 32 * 3 - maximum bytes per line, each printed into 2 chars + 1 for * separating space - * 2 - spaces separating hex dump and ascii representation - * 32 - ascii representation + * 2 - spaces separating hex dump and ASCII representation + * 32 - ASCII representation * 1 - terminating '\0' * - * Returns zero on success, -1 on overflow + * Returns: zero on success, -1 on overflow. */ int seq_buf_hex_dump(struct seq_buf *s, const char *prefix_str, int prefix_type, int rowsize, int groupsize, -- GitLab From 68fb3ca0e408e00db1c3f8fccdfa19e274c033be Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 15 Feb 2024 11:14:33 -0800 Subject: [PATCH 1117/1405] update workarounds for gcc "asm goto" issue In commit 4356e9f841f7 ("work around gcc bugs with 'asm goto' with outputs") I did the gcc workaround unconditionally, because the cause of the bad code generation wasn't entirely clear. In the meantime, Jakub Jelinek debugged the issue, and has come up with a fix in gcc [2], which also got backported to the still maintained branches of gcc-11, gcc-12 and gcc-13. Note that while the fix technically wasn't in the original gcc-14 branch, Jakub says: "while it is true that no GCC 14 snapshots until today (or whenever the fix will be committed) have the fix, for GCC trunk it is up to the distros to use the latest snapshot if they use it at all and would allow better testing of the kernel code without the workaround, so that if there are other issues they won't be discovered years later. Most userland code doesn't actually use asm goto with outputs..." so we will consider gcc-14 to be fixed - if somebody is using gcc snapshots of the gcc-14 before the fix, they should upgrade. Note that while the bug goes back to gcc-11, in practice other gcc changes seem to have effectively hidden it since gcc-12.1 as per a bisect by Jakub. So even a gcc-14 snapshot without the fix likely doesn't show actual problems. Also, make the default 'asm_goto_output()' macro mark the asm as volatile by hand, because of an unrelated gcc issue [1] where it doesn't match the documented behavior ("asm goto is always volatile"). Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979 [1] Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 [2] Link: https://lore.kernel.org/all/20240208220604.140859-1-seanjc@google.com/ Requested-by: Jakub Jelinek Cc: Uros Bizjak Cc: Nick Desaulniers Cc: Sean Christopherson Cc: Andrew Pinski Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 7 ++++--- include/linux/compiler_types.h | 9 ++++++++- init/Kconfig | 9 +++++++++ 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index c1a963be7d289..75bd1692d2e37 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -67,10 +67,9 @@ /* * GCC 'asm goto' with outputs miscompiles certain code sequences: * - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420 - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422 + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 * - * Work it around via the same compiler barrier quirk that we used + * Work around it via the same compiler barrier quirk that we used * to use for the old 'asm goto' workaround. * * Also, always mark such 'asm goto' statements as volatile: all @@ -80,8 +79,10 @@ * * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 */ +#ifdef CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND #define asm_goto_output(x...) \ do { asm volatile goto(x); asm (""); } while (0) +#endif #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 663d8791c871a..0caf354cb94b5 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -362,8 +362,15 @@ struct ftrace_likely_data { #define __member_size(p) __builtin_object_size(p, 1) #endif +/* + * Some versions of gcc do not mark 'asm goto' volatile: + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103979 + * + * We do it here by hand, because it doesn't hurt. + */ #ifndef asm_goto_output -#define asm_goto_output(x...) asm goto(x) +#define asm_goto_output(x...) asm volatile goto(x) #endif #ifdef CONFIG_CC_HAS_ASM_INLINE diff --git a/init/Kconfig b/init/Kconfig index deda3d14135bb..8426d59cc634d 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -89,6 +89,15 @@ config CC_HAS_ASM_GOTO_TIED_OUTPUT # Detect buggy gcc and clang, fixed in gcc-11 clang-14. def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) +config GCC_ASM_GOTO_OUTPUT_WORKAROUND + bool + depends on CC_IS_GCC && CC_HAS_ASM_GOTO_OUTPUT + # Fixed in GCC 14, 13.3, 12.4 and 11.5 + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 + default y if GCC_VERSION < 110500 + default y if GCC_VERSION >= 120000 && GCC_VERSION < 120400 + default y if GCC_VERSION >= 130000 && GCC_VERSION < 130300 + config TOOLS_SUPPORT_RELR def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh) -- GitLab From d16df040c8dad25c962b4404d2d534bfea327c6a Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Thu, 8 Feb 2024 16:23:29 -0500 Subject: [PATCH 1118/1405] drm/amdgpu: make damage clips support configurable We have observed that there are quite a number of PSR-SU panels on the market that are unable to keep up with what user space throws at them, resulting in hangs and random black screens. So, make damage clips support configurable and disable it by default for PSR-SU displays. Cc: stable@vger.kernel.org Reviewed-by: Mario Limonciello Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 13 +++++++++++++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 +++++++ 3 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6dce81a061ab1..517117a0796fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -200,6 +200,7 @@ extern uint amdgpu_dc_debug_mask; extern uint amdgpu_dc_visual_confirm; extern uint amdgpu_dm_abm_level; extern int amdgpu_backlight; +extern int amdgpu_damage_clips; extern struct amdgpu_mgpu_info mgpu_info; extern int amdgpu_ras_enable; extern uint amdgpu_ras_mask; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 211501ea91694..586f4d03039df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -211,6 +211,7 @@ int amdgpu_seamless = -1; /* auto */ uint amdgpu_debug_mask; int amdgpu_agp = -1; /* auto */ int amdgpu_wbrf = -1; +int amdgpu_damage_clips = -1; /* auto */ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -859,6 +860,18 @@ int amdgpu_backlight = -1; MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto (default))"); module_param_named(backlight, amdgpu_backlight, bint, 0444); +/** + * DOC: damageclips (int) + * Enable or disable damage clips support. If damage clips support is disabled, + * we will force full frame updates, irrespective of what user space sends to + * us. + * + * Defaults to -1 (where it is enabled unless a PSR-SU display is detected). + */ +MODULE_PARM_DESC(damageclips, + "Damage clips support (0 = disable, 1 = enable, -1 auto (default))"); +module_param_named(damageclips, amdgpu_damage_clips, int, 0444); + /** * DOC: tmz (int) * Trusted Memory Zone (TMZ) is a method to protect data being written diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 59d2eee72a329..d5ef07af99066 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5219,6 +5219,7 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, struct drm_plane_state *new_plane_state, struct drm_crtc_state *crtc_state, struct dc_flip_addrs *flip_addrs, + bool is_psr_su, bool *dirty_regions_changed) { struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); @@ -5243,6 +5244,10 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, num_clips = drm_plane_get_damage_clips_count(new_plane_state); clips = drm_plane_get_damage_clips(new_plane_state); + if (num_clips && (!amdgpu_damage_clips || (amdgpu_damage_clips < 0 && + is_psr_su))) + goto ffu; + if (!dm_crtc_state->mpo_requested) { if (!num_clips || num_clips > DC_MAX_DIRTY_RECTS) goto ffu; @@ -8298,6 +8303,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, fill_dc_dirty_rects(plane, old_plane_state, new_plane_state, new_crtc_state, &bundle->flip_addrs[planes_count], + acrtc_state->stream->link->psr_settings.psr_version == + DC_PSR_VERSION_SU_1, &dirty_rects_changed); /* -- GitLab From a0c9956a8d5a808c173028f1e388377a890a2fdb Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Tue, 6 Feb 2024 12:45:44 -0500 Subject: [PATCH 1119/1405] drm/amdkfd: Fix L2 cache size reporting in GFX9.4.3 Its currently incorrectly multiplied by number of XCCs in the partition Fixes: be457b2252b6 ("drm/amdkfd: Update cache info for GFX 9.4.3") Signed-off-by: Kent Russell Reviewed-by: Mukul Joshi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index e5f7c92eebcbb..6ed2ec381aaa3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1638,12 +1638,10 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, else mode = UNKNOWN_MEMORY_PARTITION_MODE; - if (pcache->cache_level == 2) - pcache->cache_size = pcache_info[cache_type].cache_size * num_xcc; - else if (mode) - pcache->cache_size = pcache_info[cache_type].cache_size / mode; - else - pcache->cache_size = pcache_info[cache_type].cache_size; + pcache->cache_size = pcache_info[cache_type].cache_size; + /* Partition mode only affects L3 cache size */ + if (mode && pcache->cache_level == 3) + pcache->cache_size /= mode; if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE) pcache->cache_type |= HSA_CACHE_TYPE_DATA; -- GitLab From 17ba9cde11c2bfebbd70867b0a2ac4a22e573379 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 9 Feb 2024 16:02:42 +0300 Subject: [PATCH 1120/1405] drm/amd/display: Fix && vs || typos These ANDs should be ORs or it will lead to a NULL dereference. Fixes: fb5a3d037082 ("drm/amd/display: Add NULL test for 'timing generator' in 'dcn21_set_pipe()'") Fixes: 886571d217d7 ("drm/amd/display: Fix 'panel_cntl' could be null in 'dcn21_set_backlight_level()'") Reviewed-by: Anthony Koo Signed-off-by: Dan Carpenter Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c index 5c7f380a84f91..7252f5f781f0d 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c @@ -211,7 +211,7 @@ void dcn21_set_pipe(struct pipe_ctx *pipe_ctx) struct dmcu *dmcu = pipe_ctx->stream->ctx->dc->res_pool->dmcu; uint32_t otg_inst; - if (!abm && !tg && !panel_cntl) + if (!abm || !tg || !panel_cntl) return; otg_inst = tg->inst; @@ -245,7 +245,7 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx, struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl; uint32_t otg_inst; - if (!abm && !tg && !panel_cntl) + if (!abm || !tg || !panel_cntl) return false; otg_inst = tg->inst; -- GitLab From 7edb5830ecb0033184ee2fa01ae8af17d56450ec Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 5 Feb 2024 16:07:36 +0530 Subject: [PATCH 1121/1405] drm/amd/display: Initialize 'wait_time_microsec' variable in link_dp_training_dpia.c wait_time_microsec = max(wait_time_microsec, (uint32_t) DPIA_CLK_SYNC_DELAY); Above line is trying to assign the maximum value between 'wait_time_microsec' and 'DPIA_CLK_SYNC_DELAY' to wait_time_microsec. However, 'wait_time_microsec' has not been assigned a value before this line, initialize 'wait_time_microsec' at the point of declaration. Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_training_dpia.c:697 dpia_training_eq_non_transparent() error: uninitialized symbol 'wait_time_microsec'. Fixes: 630168a97314 ("drm/amd/display: move dp link training logic to link_dp_training") Cc: Wenjing Liu Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam Reviewed-by: Roman Li Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/link/protocols/link_dp_training_dpia.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c index e8dda44b23cb2..5d36bab0029ca 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c @@ -619,7 +619,7 @@ static enum link_training_result dpia_training_eq_non_transparent( uint32_t retries_eq = 0; enum dc_status status; enum dc_dp_training_pattern tr_pattern; - uint32_t wait_time_microsec; + uint32_t wait_time_microsec = 0; enum dc_lane_count lane_count = lt_settings->link_settings.lane_count; union lane_align_status_updated dpcd_lane_status_updated = {0}; union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {0}; -- GitLab From 88c6d84dd8f70e498f89972449e6ebb7aa1309c0 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 5 Feb 2024 15:07:02 +0530 Subject: [PATCH 1122/1405] drm/amd/display: Fix possible use of uninitialized 'max_chunks_fbc_mode' in 'calculate_bandwidth()' 'max_chunks_fbc_mode' is only declared and assigned a value under a specific condition in the following lines: if (data->fbc_en[i] == 1) { max_chunks_fbc_mode = 128 - dmif_chunk_buff_margin; } If 'data->fbc_en[i]' is not equal to 1 for any i, max_chunks_fbc_mode will not be initialized if it's used outside of this for loop. Ensure that 'max_chunks_fbc_mode' is properly initialized before it's used. Initialize it to a default value right after its declaration to ensure that it gets a value assigned under all possible control flow paths. Thus fixing the below: drivers/gpu/drm/amd/amdgpu/../display/dc/basics/dce_calcs.c:914 calculate_bandwidth() error: uninitialized symbol 'max_chunks_fbc_mode'. drivers/gpu/drm/amd/amdgpu/../display/dc/basics/dce_calcs.c:917 calculate_bandwidth() error: uninitialized symbol 'max_chunks_fbc_mode'. Fixes: 4562236b3bc0 ("drm/amd/dc: Add dc display driver (v2)") Cc: Harry Wentland Cc: Alex Deucher Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam Reviewed-by: Roman Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c index f2dfa96f9ef5d..39530b2ea4957 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c @@ -94,7 +94,7 @@ static void calculate_bandwidth( const uint32_t s_high = 7; const uint32_t dmif_chunk_buff_margin = 1; - uint32_t max_chunks_fbc_mode; + uint32_t max_chunks_fbc_mode = 0; int32_t num_cursor_lines; int32_t i, j, k; -- GitLab From ccc514b7e7acbd301219cbaec0fc0bfe5741acee Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 5 Feb 2024 16:54:10 +0530 Subject: [PATCH 1123/1405] drm/amd/display: Fix possible buffer overflow in 'find_dcfclk_for_voltage()' when 'find_dcfclk_for_voltage()' function is looping over VG_NUM_SOC_VOLTAGE_LEVELS (which is 8), but the size of the DcfClocks array is VG_NUM_DCFCLK_DPM_LEVELS (which is 7). When the loop variable i reaches 7, the function tries to access clock_table->DcfClocks[7]. However, since the size of the DcfClocks array is 7, the valid indices are 0 to 6. Index 7 is beyond the size of the array, leading to a buffer overflow. Reported by smatch & thus fixing the below: drivers/gpu/drm/amd/amdgpu/../display/dc/clk_mgr/dcn301/vg_clk_mgr.c:550 find_dcfclk_for_voltage() error: buffer overflow 'clock_table->DcfClocks' 7 <= 7 Fixes: 3a83e4e64bb1 ("drm/amd/display: Add dcn3.01 support to DC (v2)") Cc: Roman Li Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam Reviewed-by: Roman Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c index a5489fe6875f4..aa9fd1dc550a5 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c @@ -546,6 +546,8 @@ static unsigned int find_dcfclk_for_voltage(const struct vg_dpm_clocks *clock_ta int i; for (i = 0; i < VG_NUM_SOC_VOLTAGE_LEVELS; i++) { + if (i >= VG_NUM_DCFCLK_DPM_LEVELS) + break; if (clock_table->SocVoltage[i] == voltage) return clock_table->DcfClocks[i]; } -- GitLab From 3a9626c816db901def438dc2513622e281186d39 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 7 Feb 2024 23:52:55 -0600 Subject: [PATCH 1124/1405] drm/amd: Stop evicting resources on APUs in suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback") intentionally moved the eviction of resources to earlier in the suspend process, but this introduced a subtle change that it occurs before adev->in_s0ix or adev->in_s3 are set. This meant that APUs actually started to evict resources at suspend time as well. Explicitly set s0ix or s3 in the prepare() stage, and unset them if the prepare() stage failed. v2: squash in warning fix from Stephen Rothwell Reported-by: Jürg Billeter Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3132#note_2271038 Fixes: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback") Acked-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 15 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +++++++++-- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 517117a0796fd..79827a6dcd7f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1550,9 +1550,11 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev, #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev); bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); +void amdgpu_choose_low_power_state(struct amdgpu_device *adev); #else static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } +static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { } #endif #if defined(CONFIG_DRM_AMD_DC) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 2deebece810e7..cc21ed67a3307 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1519,4 +1519,19 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) #endif /* CONFIG_AMD_PMC */ } +/** + * amdgpu_choose_low_power_state + * + * @adev: amdgpu_device_pointer + * + * Choose the target low power state for the GPU + */ +void amdgpu_choose_low_power_state(struct amdgpu_device *adev) +{ + if (amdgpu_acpi_is_s0ix_active(adev)) + adev->in_s0ix = true; + else if (amdgpu_acpi_is_s3_active(adev)) + adev->in_s3 = true; +} + #endif /* CONFIG_SUSPEND */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fdde7488d0ed9..d4b3d044935c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4514,13 +4514,15 @@ int amdgpu_device_prepare(struct drm_device *dev) struct amdgpu_device *adev = drm_to_adev(dev); int i, r; + amdgpu_choose_low_power_state(adev); + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; /* Evict the majority of BOs before starting suspend sequence */ r = amdgpu_device_evict_resources(adev); if (r) - return r; + goto unprepare; for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) @@ -4529,10 +4531,15 @@ int amdgpu_device_prepare(struct drm_device *dev) continue; r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev); if (r) - return r; + goto unprepare; } return 0; + +unprepare: + adev->in_s0ix = adev->in_s3 = false; + + return r; } /** -- GitLab From 916361685319098f696b798ef1560f69ed96e934 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 7 Feb 2024 23:52:54 -0600 Subject: [PATCH 1125/1405] Revert "drm/amd: flush any delayed gfxoff on suspend entry" commit ab4750332dbe ("drm/amdgpu/sdma5.2: add begin/end_use ring callbacks") caused GFXOFF control to be used more heavily and the codepath that was removed from commit 0dee72639533 ("drm/amd: flush any delayed gfxoff on suspend entry") now can be exercised at suspend again. Users report that by using GNOME to suspend the lockscreen trigger will cause SDMA traffic and the system can deadlock. This reverts commit 0dee726395333fea833eaaf838bc80962df886c8. Acked-by: Alex Deucher Fixes: ab4750332dbe ("drm/amdgpu/sdma5.2: add begin/end_use ring callbacks") Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 9 ++++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d4b3d044935c9..94bdb5fa6ebc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4576,7 +4576,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); cancel_delayed_work_sync(&adev->delayed_init_work); - flush_delayed_work(&adev->gfx.gfx_off_delay_work); amdgpu_ras_suspend(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index b9674c57c4365..6ddc8e3360e22 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -723,8 +723,15 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state) { - schedule_delayed_work(&adev->gfx.gfx_off_delay_work, + /* If going to s2idle, no need to wait */ + if (adev->in_s0ix) { + if (!amdgpu_dpm_set_powergating_by_smu(adev, + AMD_IP_BLOCK_TYPE_GFX, true)) + adev->gfx.gfx_off_state = true; + } else { + schedule_delayed_work(&adev->gfx.gfx_off_delay_work, delay); + } } } else { if (adev->gfx.gfx_off_req_count == 0) { -- GitLab From 94b38b895dec8c0ef093140a141e191b60ff614c Mon Sep 17 00:00:00 2001 From: Zhikai Zhai Date: Mon, 29 Jan 2024 17:02:18 +0800 Subject: [PATCH 1126/1405] drm/amd/display: Add align done check [WHY] We Double-check link status if training successful, but miss the lane align status. [HOW] Add the lane align status check Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Wenjing Liu Acked-by: Aurabindo Pillai Signed-off-by: Zhikai Zhai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/link/protocols/link_dp_training.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index 5a0b045189569..16a62e0187122 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -517,6 +517,7 @@ enum link_training_result dp_check_link_loss_status( { enum link_training_result status = LINK_TRAINING_SUCCESS; union lane_status lane_status; + union lane_align_status_updated dpcd_lane_status_updated; uint8_t dpcd_buf[6] = {0}; uint32_t lane; @@ -532,10 +533,12 @@ enum link_training_result dp_check_link_loss_status( * check lanes status */ lane_status.raw = dp_get_nibble_at_index(&dpcd_buf[2], lane); + dpcd_lane_status_updated.raw = dpcd_buf[4]; if (!lane_status.bits.CHANNEL_EQ_DONE_0 || !lane_status.bits.CR_DONE_0 || - !lane_status.bits.SYMBOL_LOCKED_0) { + !lane_status.bits.SYMBOL_LOCKED_0 || + !dp_is_interlane_aligned(dpcd_lane_status_updated)) { /* if one of the channel equalization, clock * recovery or symbol lock is dropped * consider it as (link has been -- GitLab From a538dabf772c169641e151834e161e241802ab33 Mon Sep 17 00:00:00 2001 From: Sohaib Nadeem Date: Mon, 29 Jan 2024 17:33:40 -0500 Subject: [PATCH 1127/1405] Revert "drm/amd/display: increased min_dcfclk_mhz and min_fclk_mhz" [why]: This reverts commit 2ff33c759a4247c84ec0b7815f1f223e155ba82a. The commit caused corruption when running some applications in fullscreen Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Alvin Lee Acked-by: Aurabindo Pillai Signed-off-by: Sohaib Nadeem Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index ba76dd4a2ce29..a0a65e0991041 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2760,7 +2760,7 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk struct _vcs_dpi_voltage_scaling_st entry = {0}; struct clk_limit_table_entry max_clk_data = {0}; - unsigned int min_dcfclk_mhz = 399, min_fclk_mhz = 599; + unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; static const unsigned int num_dcfclk_stas = 5; unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; -- GitLab From a589fa17cc4456df75f16fa3b49e8da0112e5100 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 6 Feb 2024 09:34:25 +0530 Subject: [PATCH 1128/1405] drm/amd/display: Fix possible NULL dereference on device remove/driver unload As part of a cleanup amdgpu_dm_fini() function, which is typically called when a device is being shut down or a driver is being unloaded The below error message suggests that there is a potential null pointer dereference issue with adev->dm.dc. In the below, line of code where adev->dm.dc is used without a preceding null check: for (i = 0; i < adev->dm.dc->caps.max_links; i++) { To fix this issue, add a null check for adev->dm.dc before this line. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:1959 amdgpu_dm_fini() error: we previously assumed 'adev->dm.dc' could be null (see line 1943) Fixes: 006c26a0f1c8 ("drm/amd/display: Fix crash on device remove/driver unload") Cc: Andrey Grodzovsky Cc: Harry Wentland Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam Reviewed-by: Roman Li Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d5ef07af99066..e0f121b221f52 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1956,7 +1956,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) &adev->dm.dmub_bo_gpu_addr, &adev->dm.dmub_bo_cpu_addr); - if (adev->dm.hpd_rx_offload_wq) { + if (adev->dm.hpd_rx_offload_wq && adev->dm.dc) { for (i = 0; i < adev->dm.dc->caps.max_links; i++) { if (adev->dm.hpd_rx_offload_wq[i].wq) { destroy_workqueue(adev->dm.hpd_rx_offload_wq[i].wq); -- GitLab From deb110292180cd501f6fde2a0178d65fcbcabb0c Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Tue, 30 Jan 2024 15:34:08 +0800 Subject: [PATCH 1129/1405] drm/amd/display: Preserve original aspect ratio in create stream [Why] The original picture aspect ratio in mode struct may have chance be overwritten with wrong aspect ratio data in create_stream_for_sink(). It will create a different VIC output and cause HDMI compliance test failed. [How] Preserve the original picture aspect ratio data during create the stream. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Aurabindo Pillai Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e0f121b221f52..cf875751971fe 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6199,7 +6199,9 @@ create_stream_for_sink(struct drm_connector *connector, if (recalculate_timing) { freesync_mode = get_highest_refresh_rate_mode(aconnector, false); drm_mode_copy(&saved_mode, &mode); + saved_mode.picture_aspect_ratio = mode.picture_aspect_ratio; drm_mode_copy(&mode, freesync_mode); + mode.picture_aspect_ratio = saved_mode.picture_aspect_ratio; } else { decide_crtc_timing_for_drm_display_mode( &mode, preferred_mode, scale); -- GitLab From 46806e59a87790760870d216f54951a5b4d545bc Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 30 Jan 2024 18:07:24 -0500 Subject: [PATCH 1130/1405] drm/amd/display: Fix array-index-out-of-bounds in dcn35_clkmgr [Why] There is a potential memory access violation while iterating through array of dcn35 clks. [How] Limit iteration per array size. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Nicholas Kazlauskas Acked-by: Aurabindo Pillai Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 14cec1c7b718c..e648902592358 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -655,10 +655,13 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1]; uint32_t max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0; uint32_t max_pstate = 0, max_dram_speed_mts = 0, min_dram_speed_mts = 0; + uint32_t num_memps, num_fclk, num_dcfclk; int i; /* Determine min/max p-state values. */ - for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) { + num_memps = (clock_table->NumMemPstatesEnabled > NUM_MEM_PSTATE_LEVELS) ? NUM_MEM_PSTATE_LEVELS : + clock_table->NumMemPstatesEnabled; + for (i = 0; i < num_memps; i++) { uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]); if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts > max_dram_speed_mts) { @@ -670,7 +673,7 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk min_dram_speed_mts = max_dram_speed_mts; min_pstate = max_pstate; - for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) { + for (i = 0; i < num_memps; i++) { uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]); if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts < min_dram_speed_mts) { @@ -699,9 +702,13 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk /* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */ ASSERT(clock_table->NumDcfClkLevelsEnabled > 0); - max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, clock_table->NumFclkLevelsEnabled); + num_fclk = (clock_table->NumFclkLevelsEnabled > NUM_FCLK_DPM_LEVELS) ? NUM_FCLK_DPM_LEVELS : + clock_table->NumFclkLevelsEnabled; + max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, num_fclk); - for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) { + num_dcfclk = (clock_table->NumFclkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS : + clock_table->NumDcfClkLevelsEnabled; + for (i = 0; i < num_dcfclk; i++) { int j; /* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */ -- GitLab From 0484e05d048b66d01d1f3c1d2306010bb57d8738 Mon Sep 17 00:00:00 2001 From: Sohaib Nadeem Date: Wed, 31 Jan 2024 16:40:37 -0500 Subject: [PATCH 1131/1405] drm/amd/display: fixed integer types and null check locations [why]: issues fixed: - comparison with wider integer type in loop condition which can cause infinite loops - pointer dereference before null check Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Reviewed-by: Josip Pavic Acked-by: Aurabindo Pillai Signed-off-by: Sohaib Nadeem Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/bios/bios_parser2.c | 16 ++++++++++------ .../drm/amd/display/dc/link/link_validation.c | 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 960c4b4f6ddf3..05f392501c0ae 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1850,19 +1850,21 @@ static enum bp_result get_firmware_info_v3_2( /* Vega12 */ smu_info_v3_2 = GET_IMAGE(struct atom_smu_info_v3_2, DATA_TABLES(smu_info)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage); if (!smu_info_v3_2) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage); + info->default_engine_clk = smu_info_v3_2->bootup_dcefclk_10khz * 10; } else if (revision.minor == 3) { /* Vega20 */ smu_info_v3_3 = GET_IMAGE(struct atom_smu_info_v3_3, DATA_TABLES(smu_info)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage); if (!smu_info_v3_3) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage); + info->default_engine_clk = smu_info_v3_3->bootup_dcefclk_10khz * 10; } @@ -2422,10 +2424,11 @@ static enum bp_result get_integrated_info_v11( info_v11 = GET_IMAGE(struct atom_integrated_system_info_v1_11, DATA_TABLES(integratedsysteminfo)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage); if (info_v11 == NULL) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage); + info->gpu_cap_info = le32_to_cpu(info_v11->gpucapinfo); /* @@ -2637,11 +2640,12 @@ static enum bp_result get_integrated_info_v2_1( info_v2_1 = GET_IMAGE(struct atom_integrated_system_info_v2_1, DATA_TABLES(integratedsysteminfo)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage); if (info_v2_1 == NULL) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage); + info->gpu_cap_info = le32_to_cpu(info_v2_1->gpucapinfo); /* @@ -2799,11 +2803,11 @@ static enum bp_result get_integrated_info_v2_2( info_v2_2 = GET_IMAGE(struct atom_integrated_system_info_v2_2, DATA_TABLES(integratedsysteminfo)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage); - if (info_v2_2 == NULL) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage); + info->gpu_cap_info = le32_to_cpu(info_v2_2->gpucapinfo); /* diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c index 8fe66c3678508..5b0bc7f6a188c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c @@ -361,7 +361,7 @@ bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const un struct dc_link *dpia_link[MAX_DPIA_NUM] = {0}; int num_dpias = 0; - for (uint8_t i = 0; i < num_streams; ++i) { + for (unsigned int i = 0; i < num_streams; ++i) { if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT) { /* new dpia sst stream, check whether it exceeds max dpia */ if (num_dpias >= MAX_DPIA_NUM) -- GitLab From 2f542421a47e8246e9b7d2c6508fe3a6e6c63078 Mon Sep 17 00:00:00 2001 From: Thong Date: Tue, 6 Feb 2024 18:05:16 -0500 Subject: [PATCH 1132/1405] drm/amdgpu/soc21: update VCN 4 max HEVC encoding resolution Update the maximum resolution reported for HEVC encoding on VCN 4 devices to reflect its 8K encoding capability. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3159 Signed-off-by: Thong Reviewed-by: Ruijing Dong Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/soc21.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 48c6efcdeac97..4d7188912edfe 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -50,13 +50,13 @@ static const struct amd_ip_funcs soc21_common_ip_funcs; /* SOC21 */ static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn0[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn1[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)}, }; static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn0 = { -- GitLab From a82197e3a5f45450cbaf92095d8a51249dc44c79 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sun, 11 Feb 2024 09:04:19 +0530 Subject: [PATCH 1133/1405] drm/amdgpu/display: Initialize gamma correction mode variable in dcn30_get_gamcor_current() The dcn30_get_gamcor_current() function is responsible for determining the current gamma correction mode used by the display controller. However, the 'mode' variable, which stores the gamma correction mode, was not initialized before its first usage, leading to an uninitialized symbol error. Thus initializes the 'mode' variable with a default value of LUT_BYPASS before the conditional statements in the function, improves code clarity and stability, ensuring correct behavior of the dcn30_get_gamcor_current() function in determining the gamma correction mode. Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_dpp_cm.c:77 dpp30_get_gamcor_current() error: uninitialized symbol 'mode'. Fixes: 03f54d7d3448 ("drm/amd/display: Add DCN3 DPP") Cc: Bhawanpreet Lakha Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Cc: Tom Chung Signed-off-by: Srinivasan Shanmugam Suggested-by: Roman Li Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c index e43f77c11c008..5f97a868ada34 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c @@ -56,16 +56,13 @@ static void dpp3_enable_cm_block( static enum dc_lut_mode dpp30_get_gamcor_current(struct dpp *dpp_base) { - enum dc_lut_mode mode; + enum dc_lut_mode mode = LUT_BYPASS; uint32_t state_mode; uint32_t lut_mode; struct dcn3_dpp *dpp = TO_DCN30_DPP(dpp_base); REG_GET(CM_GAMCOR_CONTROL, CM_GAMCOR_MODE_CURRENT, &state_mode); - if (state_mode == 0) - mode = LUT_BYPASS; - if (state_mode == 2) {//Programmable RAM LUT REG_GET(CM_GAMCOR_CONTROL, CM_GAMCOR_SELECT_CURRENT, &lut_mode); if (lut_mode == 0) -- GitLab From 0d555e481c1333c8ae170198ca111947c22fc9c9 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 23 Jan 2024 12:20:06 -0500 Subject: [PATCH 1134/1405] drm/amd/display: Increase ips2_eval delay for DCN35 [Why] New worst-case measurement observed at 1897us. [How] Increase to 2000us to cover the new worst case + margin. Reviewed-by: Ovidiu Bunea Acked-by: Aurabindo Pillai Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 1c3d89264ef72..5fdcda8f86026 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -780,7 +780,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_z10 = false, .ignore_pg = true, .psp_disabled_wa = true, - .ips2_eval_delay_us = 1650, + .ips2_eval_delay_us = 2000, .ips2_entry_delay_us = 800, .static_screen_wait_frames = 2, }; -- GitLab From e3de58f8fd5bda8685bb87bf7457bbc10479765b Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Wed, 31 Jan 2024 19:33:49 -0500 Subject: [PATCH 1135/1405] drm/amdkfd: update SIMD distribution algo for GFXIP 9.4.2 onwards In certain cooperative group dispatch scenarios the default SPI resource allocation may cause reduced per-CU workgroup occupancy. Set COMPUTE_RESOURCE_LIMITS.FORCE_SIMD_DIST=1 to mitigate soft hang scenarions. Reviewed-by: Felix Kuehling Suggested-by: Joseph Greathouse Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 9 +++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 +++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 42d881809dc70..697b6d530d12e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -303,6 +303,15 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, update_cu_mask(mm, mqd, minfo, 0); set_priority(m, q); + if (minfo && KFD_GC_VERSION(mm->dev) >= IP_VERSION(9, 4, 2)) { + if (minfo->update_flag & UPDATE_FLAG_IS_GWS) + m->compute_resource_limits |= + COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK; + else + m->compute_resource_limits &= + ~COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK; + } + q->is_active = QUEUE_IS_ACTIVE(*q); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 677281c0793e2..80320b8603fc6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -532,6 +532,7 @@ struct queue_properties { enum mqd_update_flag { UPDATE_FLAG_DBG_WA_ENABLE = 1, UPDATE_FLAG_DBG_WA_DISABLE = 2, + UPDATE_FLAG_IS_GWS = 4, /* quirk for gfx9 IP */ }; struct mqd_update_info { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 43eff221eae58..4858112f9a53b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -95,6 +95,7 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, void *gws) { + struct mqd_update_info minfo = {0}; struct kfd_node *dev = NULL; struct process_queue_node *pqn; struct kfd_process_device *pdd; @@ -146,9 +147,10 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, } pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0; + minfo.update_flag = gws ? UPDATE_FLAG_IS_GWS : 0; return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, - pqn->q, NULL); + pqn->q, &minfo); } void kfd_process_dequeue_from_all_devices(struct kfd_process *p) -- GitLab From a8ac4bcaeb660c5eeb273507e8dbf713ba56de44 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Fri, 9 Feb 2024 20:23:19 -0500 Subject: [PATCH 1136/1405] drm/amdgpu: Fix implicit assumtion in gfx11 debug flags Gfx11 debug flags mask is currently set with an implicit assumption that no other mqd update flags exist. This needs to be fixed with newly introduced flag UPDATE_FLAG_IS_GWS by the previous patch. Reviewed-by: Felix Kuehling Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index d722cbd317834..826bc4f6c8a70 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -55,8 +55,8 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); if (has_wa_flag) { - uint32_t wa_mask = minfo->update_flag == UPDATE_FLAG_DBG_WA_ENABLE ? - 0xffff : 0xffffffff; + uint32_t wa_mask = + (minfo->update_flag & UPDATE_FLAG_DBG_WA_ENABLE) ? 0xffff : 0xffffffff; m->compute_static_thread_mgmt_se0 = wa_mask; m->compute_static_thread_mgmt_se1 = wa_mask; -- GitLab From b5fc07a5fb56216a49e6c1d0b172d5464d99a89b Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 14 Feb 2024 17:14:11 -0500 Subject: [PATCH 1137/1405] scsi: core: Consult supported VPD page list prior to fetching page Commit c92a6b5d6335 ("scsi: core: Query VPD size before getting full page") removed the logic which checks whether a VPD page is present on the supported pages list before asking for the page itself. That was done because SPC helpfully states "The Supported VPD Pages VPD page list may or may not include all the VPD pages that are able to be returned by the device server". Testing had revealed a few devices that supported some of the 0xBn pages but didn't actually list them in page 0. Julian Sikorski bisected a problem with his drive resetting during discovery to the commit above. As it turns out, this particular drive firmware will crash if we attempt to fetch page 0xB9. Various approaches were attempted to work around this. In the end, reinstating the logic that consults VPD page 0 before fetching any other page was the path of least resistance. A firmware update for the devices which originally compelled us to remove the check has since been released. Link: https://lore.kernel.org/r/20240214221411.2888112-1-martin.petersen@oracle.com Fixes: c92a6b5d6335 ("scsi: core: Query VPD size before getting full page") Cc: stable@vger.kernel.org Cc: Bart Van Assche Reported-by: Julian Sikorski Tested-by: Julian Sikorski Reviewed-by: Lee Duncan Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi.c | 22 ++++++++++++++++++++-- include/scsi/scsi_device.h | 4 ---- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 76d369343c7a9..8cad9792a5627 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -328,21 +328,39 @@ static int scsi_vpd_inquiry(struct scsi_device *sdev, unsigned char *buffer, return result + 4; } +enum scsi_vpd_parameters { + SCSI_VPD_HEADER_SIZE = 4, + SCSI_VPD_LIST_SIZE = 36, +}; + static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page) { - unsigned char vpd_header[SCSI_VPD_HEADER_SIZE] __aligned(4); + unsigned char vpd[SCSI_VPD_LIST_SIZE] __aligned(4); int result; if (sdev->no_vpd_size) return SCSI_DEFAULT_VPD_LEN; + /* + * Fetch the supported pages VPD and validate that the requested page + * number is present. + */ + if (page != 0) { + result = scsi_vpd_inquiry(sdev, vpd, 0, sizeof(vpd)); + if (result < SCSI_VPD_HEADER_SIZE) + return 0; + + result -= SCSI_VPD_HEADER_SIZE; + if (!memchr(&vpd[SCSI_VPD_HEADER_SIZE], page, result)) + return 0; + } /* * Fetch the VPD page header to find out how big the page * is. This is done to prevent problems on legacy devices * which can not handle allocation lengths as large as * potentially requested by the caller. */ - result = scsi_vpd_inquiry(sdev, vpd_header, page, sizeof(vpd_header)); + result = scsi_vpd_inquiry(sdev, vpd, page, SCSI_VPD_HEADER_SIZE); if (result < 0) return 0; diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 01c02cb76ea6b..c38f4fe5e64cf 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -100,10 +100,6 @@ struct scsi_vpd { unsigned char data[]; }; -enum scsi_vpd_parameters { - SCSI_VPD_HEADER_SIZE = 4, -}; - struct scsi_device { struct Scsi_Host *host; struct request_queue *request_queue; -- GitLab From de959094eb2197636f7c803af0943cb9d3b35804 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 14 Feb 2024 23:43:56 +0900 Subject: [PATCH 1138/1405] scsi: target: pscsi: Fix bio_put() for error case As of commit 066ff571011d ("block: turn bio_kmalloc into a simple kmalloc wrapper"), a bio allocated by bio_kmalloc() must be freed by bio_uninit() and kfree(). That is not done properly for the error case, hitting WARN and NULL pointer dereference in bio_free(). Fixes: 066ff571011d ("block: turn bio_kmalloc into a simple kmalloc wrapper") CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Naohiro Aota Link: https://lore.kernel.org/r/20240214144356.101814-1-naohiro.aota@wdc.com Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/target/target_core_pscsi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 41b7489d37ce9..ed4fd22eac6e0 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -907,12 +907,15 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, return 0; fail: - if (bio) - bio_put(bio); + if (bio) { + bio_uninit(bio); + kfree(bio); + } while (req->bio) { bio = req->bio; req->bio = bio->bi_next; - bio_put(bio); + bio_uninit(bio); + kfree(bio); } req->biotail = NULL; return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; -- GitLab From f2dced9d1992824d677593072bc20eccf66ac5d5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 13 Feb 2024 21:08:09 +0300 Subject: [PATCH 1139/1405] scsi: ufs: Uninitialized variable in ufshcd_devfreq_target() There is one goto where "sched_clk_scaling_suspend_work" is true but "scale_up" is uninitialized. It leads to a Smatch uninitialized variable warning: drivers/ufs/core/ufshcd.c:1589 ufshcd_devfreq_target() error: uninitialized symbol 'scale_up'. Fixes: 1d969731b87f ("scsi: ufs: core: Only suspend clock scaling if scaling down") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/c787d37f-1107-4512-8991-bccf80e74a35@moroto.mountain Reviewed-by: Peter Wang Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index d77b25b79ae3e..3b89c9d4aa404 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -1469,7 +1469,7 @@ static int ufshcd_devfreq_target(struct device *dev, int ret = 0; struct ufs_hba *hba = dev_get_drvdata(dev); ktime_t start; - bool scale_up, sched_clk_scaling_suspend_work = false; + bool scale_up = false, sched_clk_scaling_suspend_work = false; struct list_head *clk_list = &hba->clk_list_head; struct ufs_clk_info *clki; unsigned long irq_flags; -- GitLab From 5761eb9761d2d5fe8248a9b719efc4d8baf1f24a Mon Sep 17 00:00:00 2001 From: Don Brace Date: Tue, 13 Feb 2024 10:22:00 -0600 Subject: [PATCH 1140/1405] scsi: smartpqi: Fix disable_managed_interrupts Correct blk-mq registration issue with module parameter disable_managed_interrupts enabled. When we turn off the default PCI_IRQ_AFFINITY flag, the driver needs to register with blk-mq using blk_mq_map_queues(). The driver is currently calling blk_mq_pci_map_queues() which results in a stack trace and possibly undefined behavior. Stack Trace: [ 7.860089] scsi host2: smartpqi [ 7.871934] WARNING: CPU: 0 PID: 238 at block/blk-mq-pci.c:52 blk_mq_pci_map_queues+0xca/0xd0 [ 7.889231] Modules linked in: sd_mod t10_pi sg uas smartpqi(+) crc32c_intel scsi_transport_sas usb_storage dm_mirror dm_region_hash dm_log dm_mod ipmi_devintf ipmi_msghandler fuse [ 7.924755] CPU: 0 PID: 238 Comm: kworker/0:3 Not tainted 4.18.0-372.88.1.el8_6_smartpqi_test.x86_64 #1 [ 7.944336] Hardware name: HPE ProLiant DL380 Gen10/ProLiant DL380 Gen10, BIOS U30 03/08/2022 [ 7.963026] Workqueue: events work_for_cpu_fn [ 7.978275] RIP: 0010:blk_mq_pci_map_queues+0xca/0xd0 [ 7.978278] Code: 48 89 de 89 c7 e8 f6 0f 4f 00 3b 05 c4 b7 8e 01 72 e1 5b 31 c0 5d 41 5c 41 5d 41 5e 41 5f e9 7d df 73 00 31 c0 e9 76 df 73 00 <0f> 0b eb bc 90 90 0f 1f 44 00 00 41 57 49 89 ff 41 56 41 55 41 54 [ 7.978280] RSP: 0018:ffffa95fc3707d50 EFLAGS: 00010216 [ 7.978283] RAX: 00000000ffffffff RBX: 0000000000000000 RCX: 0000000000000010 [ 7.978284] RDX: 0000000000000004 RSI: 0000000000000000 RDI: ffff9190c32d4310 [ 7.978286] RBP: 0000000000000000 R08: ffffa95fc3707d38 R09: ffff91929b81ac00 [ 7.978287] R10: 0000000000000001 R11: ffffa95fc3707ac0 R12: 0000000000000000 [ 7.978288] R13: ffff9190c32d4000 R14: 00000000ffffffff R15: ffff9190c4c950a8 [ 7.978290] FS: 0000000000000000(0000) GS:ffff9193efc00000(0000) knlGS:0000000000000000 [ 7.978292] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 8.172814] CR2: 000055d11166c000 CR3: 00000002dae10002 CR4: 00000000007706f0 [ 8.172816] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 8.172817] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 8.172818] PKRU: 55555554 [ 8.172819] Call Trace: [ 8.172823] blk_mq_alloc_tag_set+0x12e/0x310 [ 8.264339] scsi_add_host_with_dma.cold.9+0x30/0x245 [ 8.279302] pqi_ctrl_init+0xacf/0xc8e [smartpqi] [ 8.294085] ? pqi_pci_probe+0x480/0x4c8 [smartpqi] [ 8.309015] pqi_pci_probe+0x480/0x4c8 [smartpqi] [ 8.323286] local_pci_probe+0x42/0x80 [ 8.337855] work_for_cpu_fn+0x16/0x20 [ 8.351193] process_one_work+0x1a7/0x360 [ 8.364462] ? create_worker+0x1a0/0x1a0 [ 8.379252] worker_thread+0x1ce/0x390 [ 8.392623] ? create_worker+0x1a0/0x1a0 [ 8.406295] kthread+0x10a/0x120 [ 8.418428] ? set_kthread_struct+0x50/0x50 [ 8.431532] ret_from_fork+0x1f/0x40 [ 8.444137] ---[ end trace 1bf0173d39354506 ]--- Fixes: cf15c3e734e8 ("scsi: smartpqi: Add module param to disable managed ints") Tested-by: Yogesh Chandra Pandey Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Reviewed-by: Mahesh Rajashekhara Reviewed-by: Mike McGowen Reviewed-by: Kevin Barnett Signed-off-by: Don Brace Link: https://lore.kernel.org/r/20240213162200.1875970-2-don.brace@microchip.com Reviewed-by: Tomas Henzl Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen --- drivers/scsi/smartpqi/smartpqi_init.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index ceff1ec13f9ea..385180c98be49 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -6533,8 +6533,11 @@ static void pqi_map_queues(struct Scsi_Host *shost) { struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost); - blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], + if (!ctrl_info->disable_managed_interrupts) + return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], ctrl_info->pci_dev, 0); + else + return blk_mq_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT]); } static inline bool pqi_is_tape_changer_device(struct pqi_scsi_dev *device) -- GitLab From 9ddf190a7df77b77817f955fdb9c2ae9d1c9c9a3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 13 Feb 2024 21:59:53 -0800 Subject: [PATCH 1141/1405] scsi: jazz_esp: Only build if SCSI core is builtin JAZZ_ESP is a bool kconfig symbol that selects SCSI_SPI_ATTRS. When CONFIG_SCSI=m, this results in SCSI_SPI_ATTRS=m while JAZZ_ESP=y, which causes many undefined symbol linker errors. Fix this by only offering to build this driver when CONFIG_SCSI=y. [mkp: JAZZ_ESP is unique in that it does not support being compiled as a module unlike the remaining SPI SCSI HBA drivers] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Randy Dunlap Link: https://lore.kernel.org/r/20240214055953.9612-1-rdunlap@infradead.org Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Cc: Arnd Bergmann Cc: Masahiro Yamada Cc: Nicolas Schier Cc: James E.J. Bottomley Cc: Martin K. Petersen Cc: linux-scsi@vger.kernel.org Cc: Geert Uytterhoeven Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402112222.Gl0udKyU-lkp@intel.com/ Signed-off-by: Martin K. Petersen --- drivers/scsi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index addac7fbe37b9..9ce27092729c3 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1270,7 +1270,7 @@ source "drivers/scsi/arm/Kconfig" config JAZZ_ESP bool "MIPS JAZZ FAS216 SCSI support" - depends on MACH_JAZZ && SCSI + depends on MACH_JAZZ && SCSI=y select SCSI_SPI_ATTRS help This is the driver for the onboard SCSI host adapter of MIPS Magnum -- GitLab From 1baae052cccd08daf9a9d64c3f959d8cdb689757 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 1 Feb 2024 17:25:46 +0800 Subject: [PATCH 1142/1405] md: Don't ignore suspended array in md_check_recovery() mddev_suspend() never stop sync_thread, hence it doesn't make sense to ignore suspended array in md_check_recovery(), which might cause sync_thread can't be unregistered. After commit f52f5c71f3d4 ("md: fix stopping sync thread"), following hang can be triggered by test shell/integrity-caching.sh: 1) suspend the array: raid_postsuspend mddev_suspend 2) stop the array: raid_dtr md_stop __md_stop_writes stop_sync_thread set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_wakeup_thread_directly(mddev->sync_thread); wait_event(..., !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 3) sync thread done: md_do_sync set_bit(MD_RECOVERY_DONE, &mddev->recovery); md_wakeup_thread(mddev->thread); 4) daemon thread can't unregister sync thread: md_check_recovery if (mddev->suspended) return; -> return directly md_read_sync_thread clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); -> MD_RECOVERY_RUNNING can't be cleared, hence step 2 hang; This problem is not just related to dm-raid, fix it by ignoring suspended array in md_check_recovery(). And follow up patches will improve dm-raid better to frozen sync thread during suspend. Reported-by: Mikulas Patocka Closes: https://lore.kernel.org/all/8fb335e-6d2c-dbb5-d7-ded8db5145a@redhat.com/ Fixes: 68866e425be2 ("MD: no sync IO while suspended") Fixes: f52f5c71f3d4 ("md: fix stopping sync thread") Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240201092559.910982-2-yukuai1@huaweicloud.com --- drivers/md/md.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index e52896c140bdc..eea8dd529b5eb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9473,9 +9473,6 @@ static void md_start_sync(struct work_struct *ws) */ void md_check_recovery(struct mddev *mddev) { - if (READ_ONCE(mddev->suspended)) - return; - if (mddev->bitmap) md_bitmap_daemon_work(mddev); -- GitLab From 55a48ad2db64737f7ffc0407634218cc6e4c513b Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 1 Feb 2024 17:25:47 +0800 Subject: [PATCH 1143/1405] md: Don't ignore read-only array in md_check_recovery() Usually if the array is not read-write, md_check_recovery() won't register new sync_thread in the first place. And if the array is read-write and sync_thread is registered, md_set_readonly() will unregister sync_thread before setting the array read-only. md/raid follow this behavior hence there is no problem. After commit f52f5c71f3d4 ("md: fix stopping sync thread"), following hang can be triggered by test shell/integrity-caching.sh: 1) array is read-only. dm-raid update super block: rs_update_sbs ro = mddev->ro mddev->ro = 0 -> set array read-write md_update_sb 2) register new sync thread concurrently. 3) dm-raid set array back to read-only: rs_update_sbs mddev->ro = ro 4) stop the array: raid_dtr md_stop stop_sync_thread set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_wakeup_thread_directly(mddev->sync_thread); wait_event(..., !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 5) sync thread done: md_do_sync set_bit(MD_RECOVERY_DONE, &mddev->recovery); md_wakeup_thread(mddev->thread); 6) daemon thread can't unregister sync thread: md_check_recovery if (!md_is_rdwr(mddev) && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) return; -> -> MD_RECOVERY_RUNNING can't be cleared, hence step 4 hang; The root cause is that dm-raid manipulate 'mddev->ro' by itself, however, dm-raid really should stop sync thread before setting the array read-only. Unfortunately, I need to read more code before I can refacter the handler of 'mddev->ro' in dm-raid, hence let's fix the problem the easy way for now to prevent dm-raid regression. Reported-by: Mikulas Patocka Closes: https://lore.kernel.org/all/9801e40-8ac7-e225-6a71-309dcf9dc9aa@redhat.com/ Fixes: ecbfb9f118bc ("dm raid: add raid level takeover support") Fixes: f52f5c71f3d4 ("md: fix stopping sync thread") Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240201092559.910982-3-yukuai1@huaweicloud.com --- drivers/md/md.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index eea8dd529b5eb..2a1c19f87d012 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9449,6 +9449,20 @@ static void md_start_sync(struct work_struct *ws) sysfs_notify_dirent_safe(mddev->sysfs_action); } +static void unregister_sync_thread(struct mddev *mddev) +{ + if (!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { + /* resync/recovery still happening */ + clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + return; + } + + if (WARN_ON_ONCE(!mddev->sync_thread)) + return; + + md_reap_sync_thread(mddev); +} + /* * This routine is regularly called by all per-raid-array threads to * deal with generic issues like resync and super-block update. @@ -9486,7 +9500,8 @@ void md_check_recovery(struct mddev *mddev) } if (!md_is_rdwr(mddev) && - !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) + !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) && + !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) return; if ( ! ( (mddev->sb_flags & ~ (1<recovery)) { - /* sync_work already queued. */ - clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + unregister_sync_thread(mddev); goto unlock; } @@ -9572,16 +9586,7 @@ void md_check_recovery(struct mddev *mddev) * still set. */ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { - if (!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { - /* resync/recovery still happening */ - clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - goto unlock; - } - - if (WARN_ON_ONCE(!mddev->sync_thread)) - goto unlock; - - md_reap_sync_thread(mddev); + unregister_sync_thread(mddev); goto unlock; } -- GitLab From 82ec0ae59d02e89164b24c0cc8e4e50de78b5fd6 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 1 Feb 2024 17:25:48 +0800 Subject: [PATCH 1144/1405] md: Make sure md_do_sync() will set MD_RECOVERY_DONE stop_sync_thread() will interrupt md_do_sync(), and md_do_sync() must set MD_RECOVERY_DONE, so that follow up md_check_recovery() will unregister sync_thread, clear MD_RECOVERY_RUNNING and wake up stop_sync_thread(). If MD_RECOVERY_WAIT is set or the array is read-only, md_do_sync() will return without setting MD_RECOVERY_DONE, and after commit f52f5c71f3d4 ("md: fix stopping sync thread"), dm-raid switch from md_reap_sync_thread() to stop_sync_thread() to unregister sync_thread from md_stop() and md_stop_writes(), causing the test shell/lvconvert-raid-reshape.sh hang. We shouldn't switch back to md_reap_sync_thread() because it's problematic in the first place. Fix the problem by making sure md_do_sync() will set MD_RECOVERY_DONE. Reported-by: Mikulas Patocka Closes: https://lore.kernel.org/all/ece2b06f-d647-6613-a534-ff4c9bec1142@redhat.com/ Fixes: d5d885fd514f ("md: introduce new personality funciton start()") Fixes: 5fd6c1dce06e ("[PATCH] md: allow checkpoint of recovery with version-1 superblock") Fixes: f52f5c71f3d4 ("md: fix stopping sync thread") Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240201092559.910982-4-yukuai1@huaweicloud.com --- drivers/md/md.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 2a1c19f87d012..fbf04160d0098 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8792,12 +8792,16 @@ void md_do_sync(struct md_thread *thread) int ret; /* just incase thread restarts... */ - if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) || - test_bit(MD_RECOVERY_WAIT, &mddev->recovery)) + if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) return; - if (!md_is_rdwr(mddev)) {/* never try to sync a read-only array */ + + if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) + goto skip; + + if (test_bit(MD_RECOVERY_WAIT, &mddev->recovery) || + !md_is_rdwr(mddev)) {/* never try to sync a read-only array */ set_bit(MD_RECOVERY_INTR, &mddev->recovery); - return; + goto skip; } if (mddev_is_clustered(mddev)) { -- GitLab From ad39c08186f8a0f221337985036ba86731d6aafe Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 1 Feb 2024 17:25:49 +0800 Subject: [PATCH 1145/1405] md: Don't register sync_thread for reshape directly Currently, if reshape is interrupted, then reassemble the array will register sync_thread directly from pers->run(), in this case 'MD_RECOVERY_RUNNING' is set directly, however, there is no guarantee that md_do_sync() will be executed, hence stop_sync_thread() will hang because 'MD_RECOVERY_RUNNING' can't be cleared. Last patch make sure that md_do_sync() will set MD_RECOVERY_DONE, however, following hang can still be triggered by dm-raid test shell/lvconvert-raid-reshape.sh occasionally: [root@fedora ~]# cat /proc/1982/stack [<0>] stop_sync_thread+0x1ab/0x270 [md_mod] [<0>] md_frozen_sync_thread+0x5c/0xa0 [md_mod] [<0>] raid_presuspend+0x1e/0x70 [dm_raid] [<0>] dm_table_presuspend_targets+0x40/0xb0 [dm_mod] [<0>] __dm_destroy+0x2a5/0x310 [dm_mod] [<0>] dm_destroy+0x16/0x30 [dm_mod] [<0>] dev_remove+0x165/0x290 [dm_mod] [<0>] ctl_ioctl+0x4bb/0x7b0 [dm_mod] [<0>] dm_ctl_ioctl+0x11/0x20 [dm_mod] [<0>] vfs_ioctl+0x21/0x60 [<0>] __x64_sys_ioctl+0xb9/0xe0 [<0>] do_syscall_64+0xc6/0x230 [<0>] entry_SYSCALL_64_after_hwframe+0x6c/0x74 Meanwhile mddev->recovery is: MD_RECOVERY_RUNNING | MD_RECOVERY_INTR | MD_RECOVERY_RESHAPE | MD_RECOVERY_FROZEN Fix this problem by remove the code to register sync_thread directly from raid10 and raid5. And let md_check_recovery() to register sync_thread. Fixes: f67055780caa ("[PATCH] md: Checkpoint and allow restart of raid5 reshape") Fixes: f52f5c71f3d4 ("md: fix stopping sync thread") Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240201092559.910982-5-yukuai1@huaweicloud.com --- drivers/md/md.c | 5 ++++- drivers/md/raid10.c | 16 ++-------------- drivers/md/raid5.c | 29 ++--------------------------- 3 files changed, 8 insertions(+), 42 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index fbf04160d0098..dcde67a816473 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9376,6 +9376,7 @@ static void md_start_sync(struct work_struct *ws) struct mddev *mddev = container_of(ws, struct mddev, sync_work); int spares = 0; bool suspend = false; + char *name; if (md_spares_need_change(mddev)) suspend = true; @@ -9408,8 +9409,10 @@ static void md_start_sync(struct work_struct *ws) if (spares) md_bitmap_write_all(mddev->bitmap); + name = test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ? + "reshape" : "resync"; rcu_assign_pointer(mddev->sync_thread, - md_register_thread(md_do_sync, mddev, "resync")); + md_register_thread(md_do_sync, mddev, name)); if (!mddev->sync_thread) { pr_warn("%s: could not start resync thread...\n", mdname(mddev)); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 7412066ea22c7..a5f8419e2df1d 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4175,11 +4175,7 @@ static int raid10_run(struct mddev *mddev) clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - rcu_assign_pointer(mddev->sync_thread, - md_register_thread(md_do_sync, mddev, "reshape")); - if (!mddev->sync_thread) - goto out_free_conf; + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } return 0; @@ -4573,16 +4569,8 @@ static int raid10_start_reshape(struct mddev *mddev) clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); clear_bit(MD_RECOVERY_DONE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - - rcu_assign_pointer(mddev->sync_thread, - md_register_thread(md_do_sync, mddev, "reshape")); - if (!mddev->sync_thread) { - ret = -EAGAIN; - goto abort; - } + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); conf->reshape_checkpoint = jiffies; - md_wakeup_thread(mddev->sync_thread); md_new_event(); return 0; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 8497880135ee4..6a7a32f7fb912 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7936,11 +7936,7 @@ static int raid5_run(struct mddev *mddev) clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - rcu_assign_pointer(mddev->sync_thread, - md_register_thread(md_do_sync, mddev, "reshape")); - if (!mddev->sync_thread) - goto abort; + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); } /* Ok, everything is just fine now */ @@ -8506,29 +8502,8 @@ static int raid5_start_reshape(struct mddev *mddev) clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); clear_bit(MD_RECOVERY_DONE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); - set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); - rcu_assign_pointer(mddev->sync_thread, - md_register_thread(md_do_sync, mddev, "reshape")); - if (!mddev->sync_thread) { - mddev->recovery = 0; - spin_lock_irq(&conf->device_lock); - write_seqcount_begin(&conf->gen_lock); - mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; - mddev->new_chunk_sectors = - conf->chunk_sectors = conf->prev_chunk_sectors; - mddev->new_layout = conf->algorithm = conf->prev_algo; - rdev_for_each(rdev, mddev) - rdev->new_data_offset = rdev->data_offset; - smp_wmb(); - conf->generation --; - conf->reshape_progress = MaxSector; - mddev->reshape_position = MaxSector; - write_seqcount_end(&conf->gen_lock); - spin_unlock_irq(&conf->device_lock); - return -EAGAIN; - } + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); conf->reshape_checkpoint = jiffies; - md_wakeup_thread(mddev->sync_thread); md_new_event(); return 0; } -- GitLab From 9e46c70e829bddc24e04f963471e9983a11598b7 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 1 Feb 2024 17:25:50 +0800 Subject: [PATCH 1146/1405] md: Don't suspend the array for interrupted reshape md_start_sync() will suspend the array if there are spares that can be added or removed from conf, however, if reshape is still in progress, this won't happen at all or data will be corrupted(remove_and_add_spares won't be called from md_choose_sync_action for reshape), hence there is no need to suspend the array if reshape is not done yet. Meanwhile, there is a potential deadlock for raid456: 1) reshape is interrupted; 2) set one of the disk WantReplacement, and add a new disk to the array, however, recovery won't start until the reshape is finished; 3) then issue an IO across reshpae position, this IO will wait for reshape to make progress; 4) continue to reshape, then md_start_sync() found there is a spare disk that can be added to conf, mddev_suspend() is called; Step 4 and step 3 is waiting for each other, deadlock triggered. Noted this problem is found by code review, and it's not reporduced yet. Fix this porblem by don't suspend the array for interrupted reshape, this is safe because conf won't be changed until reshape is done. Fixes: bc08041b32ab ("md: suspend array in md_start_sync() if array need reconfiguration") Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20240201092559.910982-6-yukuai1@huaweicloud.com --- drivers/md/md.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index dcde67a816473..9e41a9aaba8b5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9378,12 +9378,17 @@ static void md_start_sync(struct work_struct *ws) bool suspend = false; char *name; - if (md_spares_need_change(mddev)) + /* + * If reshape is still in progress, spares won't be added or removed + * from conf until reshape is done. + */ + if (mddev->reshape_position == MaxSector && + md_spares_need_change(mddev)) { suspend = true; + mddev_suspend(mddev, false); + } - suspend ? mddev_suspend_and_lock_nointr(mddev) : - mddev_lock_nointr(mddev); - + mddev_lock_nointr(mddev); if (!md_is_rdwr(mddev)) { /* * On a read-only array we can: -- GitLab From 14db5f64a971fce3d8ea35de4dfc7f443a3efb92 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 8 Feb 2024 17:26:59 +0900 Subject: [PATCH 1147/1405] zonefs: Improve error handling Write error handling is racy and can sometime lead to the error recovery path wrongly changing the inode size of a sequential zone file to an incorrect value which results in garbage data being readable at the end of a file. There are 2 problems: 1) zonefs_file_dio_write() updates a zone file write pointer offset after issuing a direct IO with iomap_dio_rw(). This update is done only if the IO succeed for synchronous direct writes. However, for asynchronous direct writes, the update is done without waiting for the IO completion so that the next asynchronous IO can be immediately issued. However, if an asynchronous IO completes with a failure right before the i_truncate_mutex lock protecting the update, the update may change the value of the inode write pointer offset that was corrected by the error path (zonefs_io_error() function). 2) zonefs_io_error() is called when a read or write error occurs. This function executes a report zone operation using the callback function zonefs_io_error_cb(), which does all the error recovery handling based on the current zone condition, write pointer position and according to the mount options being used. However, depending on the zoned device being used, a report zone callback may be executed in a context that is different from the context of __zonefs_io_error(). As a result, zonefs_io_error_cb() may be executed without the inode truncate mutex lock held, which can lead to invalid error processing. Fix both problems as follows: - Problem 1: Perform the inode write pointer offset update before a direct write is issued with iomap_dio_rw(). This is safe to do as partial direct writes are not supported (IOMAP_DIO_PARTIAL is not set) and any failed IO will trigger the execution of zonefs_io_error() which will correct the inode write pointer offset to reflect the current state of the one on the device. - Problem 2: Change zonefs_io_error_cb() into zonefs_handle_io_error() and call this function directly from __zonefs_io_error() after obtaining the zone information using blkdev_report_zones() with a simple callback function that copies to a local stack variable the struct blk_zone obtained from the device. This ensures that error handling is performed holding the inode truncate mutex. This change also simplifies error handling for conventional zone files by bypassing the execution of report zones entirely. This is safe to do because the condition of conventional zones cannot be read-only or offline and conventional zone files are always fully mapped with a constant file size. Reported-by: Shin'ichiro Kawasaki Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Tested-by: Shin'ichiro Kawasaki Reviewed-by: Johannes Thumshirn Reviewed-by: Himanshu Madhani --- fs/zonefs/file.c | 42 +++++++++++++++++++----------- fs/zonefs/super.c | 66 +++++++++++++++++++++++++++-------------------- 2 files changed, 65 insertions(+), 43 deletions(-) diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index 6ab2318a9c8e8..dba5dcb62bef5 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -348,7 +348,12 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size, struct zonefs_inode_info *zi = ZONEFS_I(inode); if (error) { - zonefs_io_error(inode, true); + /* + * For Sync IOs, error recovery is called from + * zonefs_file_dio_write(). + */ + if (!is_sync_kiocb(iocb)) + zonefs_io_error(inode, true); return error; } @@ -491,6 +496,14 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) ret = -EINVAL; goto inode_unlock; } + /* + * Advance the zone write pointer offset. This assumes that the + * IO will succeed, which is OK to do because we do not allow + * partial writes (IOMAP_DIO_PARTIAL is not set) and if the IO + * fails, the error path will correct the write pointer offset. + */ + z->z_wpoffset += count; + zonefs_inode_account_active(inode); mutex_unlock(&zi->i_truncate_mutex); } @@ -504,20 +517,19 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) if (ret == -ENOTBLK) ret = -EBUSY; - if (zonefs_zone_is_seq(z) && - (ret > 0 || ret == -EIOCBQUEUED)) { - if (ret > 0) - count = ret; - - /* - * Update the zone write pointer offset assuming the write - * operation succeeded. If it did not, the error recovery path - * will correct it. Also do active seq file accounting. - */ - mutex_lock(&zi->i_truncate_mutex); - z->z_wpoffset += count; - zonefs_inode_account_active(inode); - mutex_unlock(&zi->i_truncate_mutex); + /* + * For a failed IO or partial completion, trigger error recovery + * to update the zone write pointer offset to a correct value. + * For asynchronous IOs, zonefs_file_write_dio_end_io() may already + * have executed error recovery if the IO already completed when we + * reach here. However, we cannot know that and execute error recovery + * again (that will not change anything). + */ + if (zonefs_zone_is_seq(z)) { + if (ret > 0 && ret != count) + ret = -EIO; + if (ret < 0 && ret != -EIOCBQUEUED) + zonefs_io_error(inode, true); } inode_unlock: diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 93971742613a3..b6e8e7c96251d 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -246,16 +246,18 @@ static void zonefs_inode_update_mode(struct inode *inode) z->z_mode = inode->i_mode; } -struct zonefs_ioerr_data { - struct inode *inode; - bool write; -}; - static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, void *data) { - struct zonefs_ioerr_data *err = data; - struct inode *inode = err->inode; + struct blk_zone *z = data; + + *z = *zone; + return 0; +} + +static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone, + bool write) +{ struct zonefs_zone *z = zonefs_inode_zone(inode); struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); @@ -270,8 +272,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, data_size = zonefs_check_zone_condition(sb, z, zone); isize = i_size_read(inode); if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) && - !err->write && isize == data_size) - return 0; + !write && isize == data_size) + return; /* * At this point, we detected either a bad zone or an inconsistency @@ -292,7 +294,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, * In all cases, warn about inode size inconsistency and handle the * IO error according to the zone condition and to the mount options. */ - if (zonefs_zone_is_seq(z) && isize != data_size) + if (isize != data_size) zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n", inode->i_ino, isize, data_size); @@ -352,8 +354,6 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, zonefs_i_size_write(inode, data_size); z->z_wpoffset = data_size; zonefs_inode_account_active(inode); - - return 0; } /* @@ -367,23 +367,25 @@ void __zonefs_io_error(struct inode *inode, bool write) { struct zonefs_zone *z = zonefs_inode_zone(inode); struct super_block *sb = inode->i_sb; - struct zonefs_sb_info *sbi = ZONEFS_SB(sb); unsigned int noio_flag; - unsigned int nr_zones = 1; - struct zonefs_ioerr_data err = { - .inode = inode, - .write = write, - }; + struct blk_zone zone; int ret; /* - * The only files that have more than one zone are conventional zone - * files with aggregated conventional zones, for which the inode zone - * size is always larger than the device zone size. + * Conventional zone have no write pointer and cannot become read-only + * or offline. So simply fake a report for a single or aggregated zone + * and let zonefs_handle_io_error() correct the zone inode information + * according to the mount options. */ - if (z->z_size > bdev_zone_sectors(sb->s_bdev)) - nr_zones = z->z_size >> - (sbi->s_zone_sectors_shift + SECTOR_SHIFT); + if (!zonefs_zone_is_seq(z)) { + zone.start = z->z_sector; + zone.len = z->z_size >> SECTOR_SHIFT; + zone.wp = zone.start + zone.len; + zone.type = BLK_ZONE_TYPE_CONVENTIONAL; + zone.cond = BLK_ZONE_COND_NOT_WP; + zone.capacity = zone.len; + goto handle_io_error; + } /* * Memory allocations in blkdev_report_zones() can trigger a memory @@ -394,12 +396,20 @@ void __zonefs_io_error(struct inode *inode, bool write) * the GFP_NOIO context avoids both problems. */ noio_flag = memalloc_noio_save(); - ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones, - zonefs_io_error_cb, &err); - if (ret != nr_zones) + ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1, + zonefs_io_error_cb, &zone); + memalloc_noio_restore(noio_flag); + + if (ret != 1) { zonefs_err(sb, "Get inode %lu zone information failed %d\n", inode->i_ino, ret); - memalloc_noio_restore(noio_flag); + zonefs_warn(sb, "remounting filesystem read-only\n"); + sb->s_flags |= SB_RDONLY; + return; + } + +handle_io_error: + zonefs_handle_io_error(inode, &zone, write); } static struct kmem_cache *zonefs_inode_cachep; -- GitLab From ee0e39a63b78849f8abbef268b13e4838569f646 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 2 Feb 2024 18:39:33 +0800 Subject: [PATCH 1148/1405] x86/mm: Move is_vsyscall_vaddr() into asm/vsyscall.h Move is_vsyscall_vaddr() into asm/vsyscall.h to make it available for copy_from_kernel_nofault_allowed() in arch/x86/mm/maccess.c. Reviewed-by: Sohil Mehta Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20240202103935.3154011-2-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- arch/x86/include/asm/vsyscall.h | 10 ++++++++++ arch/x86/mm/fault.c | 9 --------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index ab60a71a8dcb9..472f0263dbc61 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -4,6 +4,7 @@ #include #include +#include #ifdef CONFIG_X86_VSYSCALL_EMULATION extern void map_vsyscall(void); @@ -24,4 +25,13 @@ static inline bool emulate_vsyscall(unsigned long error_code, } #endif +/* + * The (legacy) vsyscall page is the long page in the kernel portion + * of the address space that has user-accessible permissions. + */ +static inline bool is_vsyscall_vaddr(unsigned long vaddr) +{ + return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR); +} + #endif /* _ASM_X86_VSYSCALL_H */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 679b09cfe241c..d6375b3c633bc 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -798,15 +798,6 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, show_opcodes(regs, loglvl); } -/* - * The (legacy) vsyscall page is the long page in the kernel portion - * of the address space that has user-accessible permissions. - */ -static bool is_vsyscall_vaddr(unsigned long vaddr) -{ - return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR); -} - static void __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, unsigned long address, u32 pkey, int si_code) -- GitLab From 32019c659ecfe1d92e3bf9fcdfbb11a7c70acd58 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 2 Feb 2024 18:39:34 +0800 Subject: [PATCH 1149/1405] x86/mm: Disallow vsyscall page read for copy_from_kernel_nofault() When trying to use copy_from_kernel_nofault() to read vsyscall page through a bpf program, the following oops was reported: BUG: unable to handle page fault for address: ffffffffff600000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 3231067 P4D 3231067 PUD 3233067 PMD 3235067 PTE 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 1 PID: 20390 Comm: test_progs ...... 6.7.0+ #58 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) ...... RIP: 0010:copy_from_kernel_nofault+0x6f/0x110 ...... Call Trace: ? copy_from_kernel_nofault+0x6f/0x110 bpf_probe_read_kernel+0x1d/0x50 bpf_prog_2061065e56845f08_do_probe_read+0x51/0x8d trace_call_bpf+0xc5/0x1c0 perf_call_bpf_enter.isra.0+0x69/0xb0 perf_syscall_enter+0x13e/0x200 syscall_trace_enter+0x188/0x1c0 do_syscall_64+0xb5/0xe0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 ...... ---[ end trace 0000000000000000 ]--- The oops is triggered when: 1) A bpf program uses bpf_probe_read_kernel() to read from the vsyscall page and invokes copy_from_kernel_nofault() which in turn calls __get_user_asm(). 2) Because the vsyscall page address is not readable from kernel space, a page fault exception is triggered accordingly. 3) handle_page_fault() considers the vsyscall page address as a user space address instead of a kernel space address. This results in the fix-up setup by bpf not being applied and a page_fault_oops() is invoked due to SMAP. Considering handle_page_fault() has already considered the vsyscall page address as a userspace address, fix the problem by disallowing vsyscall page read for copy_from_kernel_nofault(). Originally-by: Thomas Gleixner Reported-by: syzbot+72aa0161922eba61b50e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/bpf/CAG48ez06TZft=ATH1qh2c5mpS5BT8UakwNkzi6nvK5_djC-4Nw@mail.gmail.com Reported-by: xingwei lee Closes: https://lore.kernel.org/bpf/CABOYnLynjBoFZOf3Z4BhaZkc5hx_kHfsjiW+UWLoB=w33LvScw@mail.gmail.com Signed-off-by: Hou Tao Reviewed-by: Sohil Mehta Acked-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240202103935.3154011-3-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- arch/x86/mm/maccess.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c index 6993f026adec9..42115ac079cfe 100644 --- a/arch/x86/mm/maccess.c +++ b/arch/x86/mm/maccess.c @@ -3,6 +3,8 @@ #include #include +#include + #ifdef CONFIG_X86_64 bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { @@ -15,6 +17,14 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) if (vaddr < TASK_SIZE_MAX + PAGE_SIZE) return false; + /* + * Reading from the vsyscall page may cause an unhandled fault in + * certain cases. Though it is at an address above TASK_SIZE_MAX, it is + * usually considered as a user space address. + */ + if (is_vsyscall_vaddr(vaddr)) + return false; + /* * Allow everything during early boot before 'x86_virt_bits' * is initialized. Needed for instruction decoding in early -- GitLab From be66d79189ec8a1006ec6ec302bb27b160b3e6ce Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 2 Feb 2024 18:39:35 +0800 Subject: [PATCH 1150/1405] selftest/bpf: Test the read of vsyscall page under x86-64 Under x86-64, when using bpf_probe_read_kernel{_str}() or bpf_probe_read{_str}() to read vsyscall page, the read may trigger oops, so add one test case to ensure that the problem is fixed. Beside those four bpf helpers mentioned above, testing the read of vsyscall page by using bpf_probe_read_user{_str} and bpf_copy_from_user{_task}() as well. The test case passes the address of vsyscall page to these six helpers and checks whether the returned values are expected: 1) For bpf_probe_read_kernel{_str}()/bpf_probe_read{_str}(), the expected return value is -ERANGE as shown below: bpf_probe_read_kernel_common copy_from_kernel_nofault // false, return -ERANGE copy_from_kernel_nofault_allowed 2) For bpf_probe_read_user{_str}(), the expected return value is -EFAULT as show below: bpf_probe_read_user_common copy_from_user_nofault // false, return -EFAULT __access_ok 3) For bpf_copy_from_user(), the expected return value is -EFAULT: // return -EFAULT bpf_copy_from_user copy_from_user _copy_from_user // return false access_ok 4) For bpf_copy_from_user_task(), the expected return value is -EFAULT: // return -EFAULT bpf_copy_from_user_task access_process_vm // return 0 vma_lookup() // return 0 expand_stack() The occurrence of oops depends on the availability of CPU SMAP [1] feature and there are three possible configurations of vsyscall page in the boot cmd-line: vsyscall={xonly|none|emulate}, so there are a total of six possible combinations. Under all these combinations, the test case runs successfully. [1]: https://en.wikipedia.org/wiki/Supervisor_Mode_Access_Prevention Acked-by: Yonghong Song Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20240202103935.3154011-4-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/read_vsyscall.c | 57 +++++++++++++++++++ .../selftests/bpf/progs/read_vsyscall.c | 45 +++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/read_vsyscall.c create mode 100644 tools/testing/selftests/bpf/progs/read_vsyscall.c diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c new file mode 100644 index 0000000000000..3405923fe4e65 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2024. Huawei Technologies Co., Ltd */ +#include "test_progs.h" +#include "read_vsyscall.skel.h" + +#if defined(__x86_64__) +/* For VSYSCALL_ADDR */ +#include +#else +/* To prevent build failure on non-x86 arch */ +#define VSYSCALL_ADDR 0UL +#endif + +struct read_ret_desc { + const char *name; + int ret; +} all_read[] = { + { .name = "probe_read_kernel", .ret = -ERANGE }, + { .name = "probe_read_kernel_str", .ret = -ERANGE }, + { .name = "probe_read", .ret = -ERANGE }, + { .name = "probe_read_str", .ret = -ERANGE }, + { .name = "probe_read_user", .ret = -EFAULT }, + { .name = "probe_read_user_str", .ret = -EFAULT }, + { .name = "copy_from_user", .ret = -EFAULT }, + { .name = "copy_from_user_task", .ret = -EFAULT }, +}; + +void test_read_vsyscall(void) +{ + struct read_vsyscall *skel; + unsigned int i; + int err; + +#if !defined(__x86_64__) + test__skip(); + return; +#endif + skel = read_vsyscall__open_and_load(); + if (!ASSERT_OK_PTR(skel, "read_vsyscall open_load")) + return; + + skel->bss->target_pid = getpid(); + err = read_vsyscall__attach(skel); + if (!ASSERT_EQ(err, 0, "read_vsyscall attach")) + goto out; + + /* userspace may don't have vsyscall page due to LEGACY_VSYSCALL_NONE, + * but it doesn't affect the returned error codes. + */ + skel->bss->user_ptr = (void *)VSYSCALL_ADDR; + usleep(1); + + for (i = 0; i < ARRAY_SIZE(all_read); i++) + ASSERT_EQ(skel->bss->read_ret[i], all_read[i].ret, all_read[i].name); +out: + read_vsyscall__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/read_vsyscall.c b/tools/testing/selftests/bpf/progs/read_vsyscall.c new file mode 100644 index 0000000000000..986f96687ae15 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/read_vsyscall.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2024. Huawei Technologies Co., Ltd */ +#include +#include + +#include "bpf_misc.h" + +int target_pid = 0; +void *user_ptr = 0; +int read_ret[8]; + +char _license[] SEC("license") = "GPL"; + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int do_probe_read(void *ctx) +{ + char buf[8]; + + if ((bpf_get_current_pid_tgid() >> 32) != target_pid) + return 0; + + read_ret[0] = bpf_probe_read_kernel(buf, sizeof(buf), user_ptr); + read_ret[1] = bpf_probe_read_kernel_str(buf, sizeof(buf), user_ptr); + read_ret[2] = bpf_probe_read(buf, sizeof(buf), user_ptr); + read_ret[3] = bpf_probe_read_str(buf, sizeof(buf), user_ptr); + read_ret[4] = bpf_probe_read_user(buf, sizeof(buf), user_ptr); + read_ret[5] = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr); + + return 0; +} + +SEC("fentry.s/" SYS_PREFIX "sys_nanosleep") +int do_copy_from_user(void *ctx) +{ + char buf[8]; + + if ((bpf_get_current_pid_tgid() >> 32) != target_pid) + return 0; + + read_ret[6] = bpf_copy_from_user(buf, sizeof(buf), user_ptr); + read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr, + bpf_get_current_task_btf(), 0); + + return 0; +} -- GitLab From 4860abb91f3d7fbaf8147d54782149bb1fc45892 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 6 Feb 2024 16:34:22 -0600 Subject: [PATCH 1151/1405] smb: Fix regression in writes when non-standard maximum write size negotiated The conversion to netfs in the 6.3 kernel caused a regression when maximum write size is set by the server to an unexpected value which is not a multiple of 4096 (similarly if the user overrides the maximum write size by setting mount parm "wsize", but sets it to a value that is not a multiple of 4096). When negotiated write size is not a multiple of 4096 the netfs code can skip the end of the final page when doing large sequential writes, causing data corruption. This section of code is being rewritten/removed due to a large netfs change, but until that point (ie for the 6.3 kernel until now) we can not support non-standard maximum write sizes. Add a warning if a user specifies a wsize on mount that is not a multiple of 4096 (and round down), also add a change where we round down the maximum write size if the server negotiates a value that is not a multiple of 4096 (we also have to check to make sure that we do not round it down to zero). Reported-by: R. Diez" Fixes: d08089f649a0 ("cifs: Change the I/O paths to use an iterator rather than a page list") Suggested-by: Ronnie Sahlberg Acked-by: Ronnie Sahlberg Tested-by: Matthew Ruffell Reviewed-by: Shyam Prasad N Cc: stable@vger.kernel.org # v6.3+ Cc: David Howells Signed-off-by: Steve French --- fs/smb/client/connect.c | 14 ++++++++++++-- fs/smb/client/fs_context.c | 11 +++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index d03253f8f1455..ac9595504f4b1 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -3444,8 +3444,18 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) * the user on mount */ if ((cifs_sb->ctx->wsize == 0) || - (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx))) - cifs_sb->ctx->wsize = server->ops->negotiate_wsize(tcon, ctx); + (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx))) { + cifs_sb->ctx->wsize = + round_down(server->ops->negotiate_wsize(tcon, ctx), PAGE_SIZE); + /* + * in the very unlikely event that the server sent a max write size under PAGE_SIZE, + * (which would get rounded down to 0) then reset wsize to absolute minimum eg 4096 + */ + if (cifs_sb->ctx->wsize == 0) { + cifs_sb->ctx->wsize = PAGE_SIZE; + cifs_dbg(VFS, "wsize too small, reset to minimum ie PAGE_SIZE, usually 4096\n"); + } + } if ((cifs_sb->ctx->rsize == 0) || (cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx))) cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx); diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index aec8dbd1f9dbd..4b2f5aa2ea0e1 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1111,6 +1111,17 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, case Opt_wsize: ctx->wsize = result.uint_32; ctx->got_wsize = true; + if (ctx->wsize % PAGE_SIZE != 0) { + ctx->wsize = round_down(ctx->wsize, PAGE_SIZE); + if (ctx->wsize == 0) { + ctx->wsize = PAGE_SIZE; + cifs_dbg(VFS, "wsize too small, reset to minimum %ld\n", PAGE_SIZE); + } else { + cifs_dbg(VFS, + "wsize rounded down to %d to multiple of PAGE_SIZE %ld\n", + ctx->wsize, PAGE_SIZE); + } + } break; case Opt_acregmax: ctx->acregmax = HZ * result.uint_32; -- GitLab From b4ea9b6a18ebf7f9f3a7a60f82e925186978cfcf Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 14 Feb 2024 17:32:40 +0100 Subject: [PATCH 1152/1405] net/iucv: fix the allocation size of iucv_path_table array iucv_path_table is a dynamically allocated array of pointers to struct iucv_path items. Yet, its size is calculated as if it was an array of struct iucv_path items. Signed-off-by: Alexander Gordeev Reviewed-by: Alexandra Winter Signed-off-by: David S. Miller --- net/iucv/iucv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 6334f64f04d5f..b0b3e9c5af44f 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -156,7 +156,7 @@ static char iucv_error_pathid[16] = "INVALID PATHID"; static LIST_HEAD(iucv_handler_list); /* - * iucv_path_table: an array of iucv_path structures. + * iucv_path_table: array of pointers to iucv_path structures. */ static struct iucv_path **iucv_path_table; static unsigned long iucv_max_pathid; @@ -544,7 +544,7 @@ static int iucv_enable(void) cpus_read_lock(); rc = -ENOMEM; - alloc_size = iucv_max_pathid * sizeof(struct iucv_path); + alloc_size = iucv_max_pathid * sizeof(*iucv_path_table); iucv_path_table = kzalloc(alloc_size, GFP_KERNEL); if (!iucv_path_table) goto out; -- GitLab From dc489f86257cab5056e747344f17a164f63bff4b Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 14 Feb 2024 22:40:03 +0100 Subject: [PATCH 1153/1405] net: bridge: switchdev: Skip MDB replays of deferred events on offload Before this change, generation of the list of MDB events to replay would race against the creation of new group memberships, either from the IGMP/MLD snooping logic or from user configuration. While new memberships are immediately visible to walkers of br->mdb_list, the notification of their existence to switchdev event subscribers is deferred until a later point in time. So if a replay list was generated during a time that overlapped with such a window, it would also contain a replay of the not-yet-delivered event. The driver would thus receive two copies of what the bridge internally considered to be one single event. On destruction of the bridge, only a single membership deletion event was therefore sent. As a consequence of this, drivers which reference count memberships (at least DSA), would be left with orphan groups in their hardware database when the bridge was destroyed. This is only an issue when replaying additions. While deletion events may still be pending on the deferred queue, they will already have been removed from br->mdb_list, so no duplicates can be generated in that scenario. To a user this meant that old group memberships, from a bridge in which a port was previously attached, could be reanimated (in hardware) when the port joined a new bridge, without the new bridge's knowledge. For example, on an mv88e6xxx system, create a snooping bridge and immediately add a port to it: root@infix-06-0b-00:~$ ip link add dev br0 up type bridge mcast_snooping 1 && \ > ip link set dev x3 up master br0 And then destroy the bridge: root@infix-06-0b-00:~$ ip link del dev br0 root@infix-06-0b-00:~$ mvls atu ADDRESS FID STATE Q F 0 1 2 3 4 5 6 7 8 9 a DEV:0 Marvell 88E6393X 33:33:00:00:00:6a 1 static - - 0 . . . . . . . . . . 33:33:ff:87:e4:3f 1 static - - 0 . . . . . . . . . . ff:ff:ff:ff:ff:ff 1 static - - 0 1 2 3 4 5 6 7 8 9 a root@infix-06-0b-00:~$ The two IPv6 groups remain in the hardware database because the port (x3) is notified of the host's membership twice: once via the original event and once via a replay. Since only a single delete notification is sent, the count remains at 1 when the bridge is destroyed. Then add the same port (or another port belonging to the same hardware domain) to a new bridge, this time with snooping disabled: root@infix-06-0b-00:~$ ip link add dev br1 up type bridge mcast_snooping 0 && \ > ip link set dev x3 up master br1 All multicast, including the two IPv6 groups from br0, should now be flooded, according to the policy of br1. But instead the old memberships are still active in the hardware database, causing the switch to only forward traffic to those groups towards the CPU (port 0). Eliminate the race in two steps: 1. Grab the write-side lock of the MDB while generating the replay list. This prevents new memberships from showing up while we are generating the replay list. But it leaves the scenario in which a deferred event was already generated, but not delivered, before we grabbed the lock. Therefore: 2. Make sure that no deferred version of a replay event is already enqueued to the switchdev deferred queue, before adding it to the replay list, when replaying additions. Fixes: 4f2673b3a2b6 ("net: bridge: add helper to replay port and host-joined mdb entries") Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/net/switchdev.h | 3 ++ net/bridge/br_switchdev.c | 74 ++++++++++++++++++++++++--------------- net/switchdev/switchdev.c | 73 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 28 deletions(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index a43062d4c734b..8346b0d29542c 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -308,6 +308,9 @@ void switchdev_deferred_process(void); int switchdev_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct netlink_ext_ack *extack); +bool switchdev_port_obj_act_is_deferred(struct net_device *dev, + enum switchdev_notifier_type nt, + const struct switchdev_obj *obj); int switchdev_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct netlink_ext_ack *extack); diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index ee84e783e1dff..6a7cb01f121c7 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -595,21 +595,40 @@ br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev, } static int br_switchdev_mdb_queue_one(struct list_head *mdb_list, + struct net_device *dev, + unsigned long action, enum switchdev_obj_id id, const struct net_bridge_mdb_entry *mp, struct net_device *orig_dev) { - struct switchdev_obj_port_mdb *mdb; + struct switchdev_obj_port_mdb mdb = { + .obj = { + .id = id, + .orig_dev = orig_dev, + }, + }; + struct switchdev_obj_port_mdb *pmdb; - mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC); - if (!mdb) - return -ENOMEM; + br_switchdev_mdb_populate(&mdb, mp); - mdb->obj.id = id; - mdb->obj.orig_dev = orig_dev; - br_switchdev_mdb_populate(mdb, mp); - list_add_tail(&mdb->obj.list, mdb_list); + if (action == SWITCHDEV_PORT_OBJ_ADD && + switchdev_port_obj_act_is_deferred(dev, action, &mdb.obj)) { + /* This event is already in the deferred queue of + * events, so this replay must be elided, lest the + * driver receives duplicate events for it. This can + * only happen when replaying additions, since + * modifications are always immediately visible in + * br->mdb_list, whereas actual event delivery may be + * delayed. + */ + return 0; + } + + pmdb = kmemdup(&mdb, sizeof(mdb), GFP_ATOMIC); + if (!pmdb) + return -ENOMEM; + list_add_tail(&pmdb->obj.list, mdb_list); return 0; } @@ -677,51 +696,50 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev, if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) return 0; - /* We cannot walk over br->mdb_list protected just by the rtnl_mutex, - * because the write-side protection is br->multicast_lock. But we - * need to emulate the [ blocking ] calling context of a regular - * switchdev event, so since both br->multicast_lock and RCU read side - * critical sections are atomic, we have no choice but to pick the RCU - * read side lock, queue up all our events, leave the critical section - * and notify switchdev from blocking context. + if (adding) + action = SWITCHDEV_PORT_OBJ_ADD; + else + action = SWITCHDEV_PORT_OBJ_DEL; + + /* br_switchdev_mdb_queue_one() will take care to not queue a + * replay of an event that is already pending in the switchdev + * deferred queue. In order to safely determine that, there + * must be no new deferred MDB notifications enqueued for the + * duration of the MDB scan. Therefore, grab the write-side + * lock to avoid racing with any concurrent IGMP/MLD snooping. */ - rcu_read_lock(); + spin_lock_bh(&br->multicast_lock); - hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) { + hlist_for_each_entry(mp, &br->mdb_list, mdb_node) { struct net_bridge_port_group __rcu * const *pp; const struct net_bridge_port_group *p; if (mp->host_joined) { - err = br_switchdev_mdb_queue_one(&mdb_list, + err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, SWITCHDEV_OBJ_ID_HOST_MDB, mp, br_dev); if (err) { - rcu_read_unlock(); + spin_unlock_bh(&br->multicast_lock); goto out_free_mdb; } } - for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL; + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (p->key.port->dev != dev) continue; - err = br_switchdev_mdb_queue_one(&mdb_list, + err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, SWITCHDEV_OBJ_ID_PORT_MDB, mp, dev); if (err) { - rcu_read_unlock(); + spin_unlock_bh(&br->multicast_lock); goto out_free_mdb; } } } - rcu_read_unlock(); - - if (adding) - action = SWITCHDEV_PORT_OBJ_ADD; - else - action = SWITCHDEV_PORT_OBJ_DEL; + spin_unlock_bh(&br->multicast_lock); list_for_each_entry(obj, &mdb_list, list) { err = br_switchdev_mdb_replay_one(nb, dev, diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 5b045284849e0..c9189a970eec3 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -19,6 +19,35 @@ #include #include +static bool switchdev_obj_eq(const struct switchdev_obj *a, + const struct switchdev_obj *b) +{ + const struct switchdev_obj_port_vlan *va, *vb; + const struct switchdev_obj_port_mdb *ma, *mb; + + if (a->id != b->id || a->orig_dev != b->orig_dev) + return false; + + switch (a->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + va = SWITCHDEV_OBJ_PORT_VLAN(a); + vb = SWITCHDEV_OBJ_PORT_VLAN(b); + return va->flags == vb->flags && + va->vid == vb->vid && + va->changed == vb->changed; + case SWITCHDEV_OBJ_ID_PORT_MDB: + case SWITCHDEV_OBJ_ID_HOST_MDB: + ma = SWITCHDEV_OBJ_PORT_MDB(a); + mb = SWITCHDEV_OBJ_PORT_MDB(b); + return ma->vid == mb->vid && + ether_addr_equal(ma->addr, mb->addr); + default: + break; + } + + BUG(); +} + static LIST_HEAD(deferred); static DEFINE_SPINLOCK(deferred_lock); @@ -307,6 +336,50 @@ int switchdev_port_obj_del(struct net_device *dev, } EXPORT_SYMBOL_GPL(switchdev_port_obj_del); +/** + * switchdev_port_obj_act_is_deferred - Is object action pending? + * + * @dev: port device + * @nt: type of action; add or delete + * @obj: object to test + * + * Returns true if a deferred item is pending, which is + * equivalent to the action @nt on an object @obj. + * + * rtnl_lock must be held. + */ +bool switchdev_port_obj_act_is_deferred(struct net_device *dev, + enum switchdev_notifier_type nt, + const struct switchdev_obj *obj) +{ + struct switchdev_deferred_item *dfitem; + bool found = false; + + ASSERT_RTNL(); + + spin_lock_bh(&deferred_lock); + + list_for_each_entry(dfitem, &deferred, list) { + if (dfitem->dev != dev) + continue; + + if ((dfitem->func == switchdev_port_obj_add_deferred && + nt == SWITCHDEV_PORT_OBJ_ADD) || + (dfitem->func == switchdev_port_obj_del_deferred && + nt == SWITCHDEV_PORT_OBJ_DEL)) { + if (switchdev_obj_eq((const void *)dfitem->data, obj)) { + found = true; + break; + } + } + } + + spin_unlock_bh(&deferred_lock); + + return found; +} +EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred); + static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain); -- GitLab From f7a70d650b0b6b0134ccba763d672c8439d9f09b Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 14 Feb 2024 22:40:04 +0100 Subject: [PATCH 1154/1405] net: bridge: switchdev: Ensure deferred event delivery on unoffload When unoffloading a device, it is important to ensure that all relevant deferred events are delivered to it before it disassociates itself from the bridge. Before this change, this was true for the normal case when a device maps 1:1 to a net_bridge_port, i.e. br0 / swp0 When swp0 leaves br0, the call to switchdev_deferred_process() in del_nbp() makes sure to process any outstanding events while the device is still associated with the bridge. In the case when the association is indirect though, i.e. when the device is attached to the bridge via an intermediate device, like a LAG... br0 / lag0 / swp0 ...then detaching swp0 from lag0 does not cause any net_bridge_port to be deleted, so there was no guarantee that all events had been processed before the device disassociated itself from the bridge. Fix this by always synchronously processing all deferred events before signaling completion of unoffloading back to the driver. Fixes: 4e51bf44a03a ("net: bridge: move the switchdev object replay helpers to "push" mode") Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/bridge/br_switchdev.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 6a7cb01f121c7..7b41ee8740cbb 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -804,6 +804,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p, br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL); br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL); + + /* Make sure that the device leaving this bridge has seen all + * relevant events before it is disassociated. In the normal + * case, when the device is directly attached to the bridge, + * this is covered by del_nbp(). If the association was indirect + * however, e.g. via a team or bond, and the device is leaving + * that intermediate device, then the bridge port remains in + * place. + */ + switchdev_deferred_process(); } /* Let the bridge know that this port is offloaded, so that it can assign a -- GitLab From 66b60b0c8c4a163b022a9f0ad6769b0fd3dc662f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 14 Feb 2024 11:13:08 -0800 Subject: [PATCH 1155/1405] dccp/tcp: Unhash sk from ehash for tb2 alloc failure after check_estalblished(). syzkaller reported a warning [0] in inet_csk_destroy_sock() with no repro. WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash); However, the syzkaller's log hinted that connect() failed just before the warning due to FAULT_INJECTION. [1] When connect() is called for an unbound socket, we search for an available ephemeral port. If a bhash bucket exists for the port, we call __inet_check_established() or __inet6_check_established() to check if the bucket is reusable. If reusable, we add the socket into ehash and set inet_sk(sk)->inet_num. Later, we look up the corresponding bhash2 bucket and try to allocate it if it does not exist. Although it rarely occurs in real use, if the allocation fails, we must revert the changes by check_established(). Otherwise, an unconnected socket could illegally occupy an ehash entry. Note that we do not put tw back into ehash because sk might have already responded to a packet for tw and it would be better to free tw earlier under such memory presure. [0]: WARNING: CPU: 0 PID: 350830 at net/ipv4/inet_connection_sock.c:1193 inet_csk_destroy_sock (net/ipv4/inet_connection_sock.c:1193) Modules linked in: Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:inet_csk_destroy_sock (net/ipv4/inet_connection_sock.c:1193) Code: 41 5c 41 5d 41 5e e9 2d 4a 3d fd e8 28 4a 3d fd 48 89 ef e8 f0 cd 7d ff 5b 5d 41 5c 41 5d 41 5e e9 13 4a 3d fd e8 0e 4a 3d fd <0f> 0b e9 61 fe ff ff e8 02 4a 3d fd 4c 89 e7 be 03 00 00 00 e8 05 RSP: 0018:ffffc9000b21fd38 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000009e78 RCX: ffffffff840bae40 RDX: ffff88806e46c600 RSI: ffffffff840bb012 RDI: ffff88811755cca8 RBP: ffff88811755c880 R08: 0000000000000003 R09: 0000000000000000 R10: 0000000000009e78 R11: 0000000000000000 R12: ffff88811755c8e0 R13: ffff88811755c892 R14: ffff88811755c918 R15: 0000000000000000 FS: 00007f03e5243800(0000) GS:ffff88811ae00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b32f21000 CR3: 0000000112ffe001 CR4: 0000000000770ef0 PKRU: 55555554 Call Trace: ? inet_csk_destroy_sock (net/ipv4/inet_connection_sock.c:1193) dccp_close (net/dccp/proto.c:1078) inet_release (net/ipv4/af_inet.c:434) __sock_release (net/socket.c:660) sock_close (net/socket.c:1423) __fput (fs/file_table.c:377) __fput_sync (fs/file_table.c:462) __x64_sys_close (fs/open.c:1557 fs/open.c:1539 fs/open.c:1539) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) RIP: 0033:0x7f03e53852bb Code: 03 00 00 00 0f 05 48 3d 00 f0 ff ff 77 41 c3 48 83 ec 18 89 7c 24 0c e8 43 c9 f5 ff 8b 7c 24 0c 41 89 c0 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 35 44 89 c7 89 44 24 0c e8 a1 c9 f5 ff 8b 44 RSP: 002b:00000000005dfba0 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f03e53852bb RDX: 0000000000000002 RSI: 0000000000000002 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000000 R09: 000000000000167c R10: 0000000008a79680 R11: 0000000000000293 R12: 00007f03e4e43000 R13: 00007f03e4e43170 R14: 00007f03e4e43178 R15: 00007f03e4e43170 [1]: FAULT_INJECTION: forcing a failure. name failslab, interval 1, probability 0, space 0, times 0 CPU: 0 PID: 350833 Comm: syz-executor.1 Not tainted 6.7.0-12272-g2121c43f88f5 #9 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) should_fail_ex (lib/fault-inject.c:52 lib/fault-inject.c:153) should_failslab (mm/slub.c:3748) kmem_cache_alloc (mm/slub.c:3763 mm/slub.c:3842 mm/slub.c:3867) inet_bind2_bucket_create (net/ipv4/inet_hashtables.c:135) __inet_hash_connect (net/ipv4/inet_hashtables.c:1100) dccp_v4_connect (net/dccp/ipv4.c:116) __inet_stream_connect (net/ipv4/af_inet.c:676) inet_stream_connect (net/ipv4/af_inet.c:747) __sys_connect_file (net/socket.c:2048 (discriminator 2)) __sys_connect (net/socket.c:2065) __x64_sys_connect (net/socket.c:2072) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) RIP: 0033:0x7f03e5284e5d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 9f 1b 00 f7 d8 64 89 01 48 RSP: 002b:00007f03e4641cc8 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 00000000004bbf80 RCX: 00007f03e5284e5d RDX: 0000000000000010 RSI: 0000000020000000 RDI: 0000000000000003 RBP: 00000000004bbf80 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 R13: 000000000000000b R14: 00007f03e52e5530 R15: 0000000000000000 Reported-by: syzkaller Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 93e9193df5446..308ff34002ea6 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -1130,10 +1130,33 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, return 0; error: + if (sk_hashed(sk)) { + spinlock_t *lock = inet_ehash_lockp(hinfo, sk->sk_hash); + + sock_prot_inuse_add(net, sk->sk_prot, -1); + + spin_lock(lock); + sk_nulls_del_node_init_rcu(sk); + spin_unlock(lock); + + sk->sk_hash = 0; + inet_sk(sk)->inet_sport = 0; + inet_sk(sk)->inet_num = 0; + + if (tw) + inet_twsk_bind_unhash(tw, hinfo); + } + spin_unlock(&head2->lock); if (tb_created) inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); - spin_unlock_bh(&head->lock); + spin_unlock(&head->lock); + + if (tw) + inet_twsk_deschedule_put(tw); + + local_bh_enable(); + return -ENOMEM; } -- GitLab From a9f80df4f51440303d063b55bb98720857693821 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 14 Feb 2024 23:00:50 -0800 Subject: [PATCH 1156/1405] net: ethernet: adi: requires PHYLIB support This driver uses functions that are supplied by the Kconfig symbol PHYLIB, so select it to ensure that they are built as needed. When CONFIG_ADIN1110=y and CONFIG_PHYLIB=m, there are multiple build (linker) errors that are resolved by this Kconfig change: ld: drivers/net/ethernet/adi/adin1110.o: in function `adin1110_net_open': drivers/net/ethernet/adi/adin1110.c:933: undefined reference to `phy_start' ld: drivers/net/ethernet/adi/adin1110.o: in function `adin1110_probe_netdevs': drivers/net/ethernet/adi/adin1110.c:1603: undefined reference to `get_phy_device' ld: drivers/net/ethernet/adi/adin1110.c:1609: undefined reference to `phy_connect' ld: drivers/net/ethernet/adi/adin1110.o: in function `adin1110_disconnect_phy': drivers/net/ethernet/adi/adin1110.c:1226: undefined reference to `phy_disconnect' ld: drivers/net/ethernet/adi/adin1110.o: in function `devm_mdiobus_alloc': include/linux/phy.h:455: undefined reference to `devm_mdiobus_alloc_size' ld: drivers/net/ethernet/adi/adin1110.o: in function `adin1110_register_mdiobus': drivers/net/ethernet/adi/adin1110.c:529: undefined reference to `__devm_mdiobus_register' ld: drivers/net/ethernet/adi/adin1110.o: in function `adin1110_net_stop': drivers/net/ethernet/adi/adin1110.c:958: undefined reference to `phy_stop' ld: drivers/net/ethernet/adi/adin1110.o: in function `adin1110_disconnect_phy': drivers/net/ethernet/adi/adin1110.c:1226: undefined reference to `phy_disconnect' ld: drivers/net/ethernet/adi/adin1110.o: in function `adin1110_adjust_link': drivers/net/ethernet/adi/adin1110.c:1077: undefined reference to `phy_print_status' ld: drivers/net/ethernet/adi/adin1110.o: in function `adin1110_ioctl': drivers/net/ethernet/adi/adin1110.c:790: undefined reference to `phy_do_ioctl' ld: drivers/net/ethernet/adi/adin1110.o:(.rodata+0xf60): undefined reference to `phy_ethtool_get_link_ksettings' ld: drivers/net/ethernet/adi/adin1110.o:(.rodata+0xf68): undefined reference to `phy_ethtool_set_link_ksettings' Fixes: bc93e19d088b ("net: ethernet: adi: Add ADIN1110 support") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402070626.eZsfVHG5-lkp@intel.com/ Cc: Lennart Franzen Cc: Alexandru Tachici Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: netdev@vger.kernel.org Reviewed-by: Nuno Sa Signed-off-by: David S. Miller --- drivers/net/ethernet/adi/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/adi/Kconfig b/drivers/net/ethernet/adi/Kconfig index da3bdd3025022..c91b4dcef4ec2 100644 --- a/drivers/net/ethernet/adi/Kconfig +++ b/drivers/net/ethernet/adi/Kconfig @@ -7,6 +7,7 @@ config NET_VENDOR_ADI bool "Analog Devices devices" default y depends on SPI + select PHYLIB help If you have a network (Ethernet) card belonging to this class, say Y. -- GitLab From 52f671db18823089a02f07efc04efdb2272ddc17 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 15 Feb 2024 06:33:45 -0800 Subject: [PATCH 1157/1405] net/sched: act_mirred: use the backlog for mirred ingress The test Davide added in commit ca22da2fbd69 ("act_mirred: use the backlog for nested calls to mirred ingress") hangs our testing VMs every 10 or so runs, with the familiar tcp_v4_rcv -> tcp_v4_rcv deadlock reported by lockdep. The problem as previously described by Davide (see Link) is that if we reverse flow of traffic with the redirect (egress -> ingress) we may reach the same socket which generated the packet. And we may still be holding its socket lock. The common solution to such deadlocks is to put the packet in the Rx backlog, rather than run the Rx path inline. Do that for all egress -> ingress reversals, not just once we started to nest mirred calls. In the past there was a concern that the backlog indirection will lead to loss of error reporting / less accurate stats. But the current workaround does not seem to address the issue. Fixes: 53592b364001 ("net/sched: act_mirred: Implement ingress actions") Cc: Marcelo Ricardo Leitner Suggested-by: Davide Caratti Link: https://lore.kernel.org/netdev/33dc43f587ec1388ba456b4915c75f02a8aae226.1663945716.git.dcaratti@redhat.com/ Signed-off-by: Jakub Kicinski Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 14 +++++--------- .../testing/selftests/net/forwarding/tc_actions.sh | 3 --- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 0a1a9e40f2370..291d47c9eb69d 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -232,18 +232,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, return err; } -static bool is_mirred_nested(void) -{ - return unlikely(__this_cpu_read(mirred_nest_level) > 1); -} - -static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb) +static int +tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb) { int err; if (!want_ingress) err = tcf_dev_queue_xmit(skb, dev_queue_xmit); - else if (is_mirred_nested()) + else if (!at_ingress) err = netif_rx(skb); else err = netif_receive_skb(skb); @@ -319,9 +315,9 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress); - err = tcf_mirred_forward(want_ingress, skb_to_send); + err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); } else { - err = tcf_mirred_forward(want_ingress, skb_to_send); + err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); } if (err) { diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index b0f5e55d2d0b2..5896296365022 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -235,9 +235,6 @@ mirred_egress_to_ingress_tcp_test() check_err $? "didn't mirred redirect ICMP" tc_check_packets "dev $h1 ingress" 102 10 check_err $? "didn't drop mirred ICMP" - local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits) - test ${overlimits} = 10 - check_err $? "wrong overlimits, expected 10 got ${overlimits}" tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower -- GitLab From 166c2c8a6a4dc2e4ceba9e10cfe81c3e469e3210 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 15 Feb 2024 06:33:46 -0800 Subject: [PATCH 1158/1405] net/sched: act_mirred: don't override retval if we already lost the skb If we're redirecting the skb, and haven't called tcf_mirred_forward(), yet, we need to tell the core to drop the skb by setting the retcode to SHOT. If we have called tcf_mirred_forward(), however, the skb is out of our hands and returning SHOT will lead to UaF. Move the retval override to the error path which actually need it. Reviewed-by: Michal Swiatkowski Fixes: e5cf1baf92cb ("act_mirred: use TC_ACT_REINSERT when possible") Signed-off-by: Jakub Kicinski Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 291d47c9eb69d..6faa7d00da097 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -266,8 +266,7 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) { net_notice_ratelimited("tc mirred to Houston: device %s is down\n", dev->name); - err = -ENODEV; - goto out; + goto err_cant_do; } /* we could easily avoid the clone only if called by ingress and clsact; @@ -279,10 +278,8 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, tcf_mirred_can_reinsert(retval); if (!dont_clone) { skb_to_send = skb_clone(skb, GFP_ATOMIC); - if (!skb_to_send) { - err = -ENOMEM; - goto out; - } + if (!skb_to_send) + goto err_cant_do; } want_ingress = tcf_mirred_act_wants_ingress(m_eaction); @@ -319,15 +316,16 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, } else { err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); } - - if (err) { -out: + if (err) tcf_action_inc_overlimit_qstats(&m->common); - if (is_redirect) - retval = TC_ACT_SHOT; - } return retval; + +err_cant_do: + if (is_redirect) + retval = TC_ACT_SHOT; + tcf_action_inc_overlimit_qstats(&m->common); + return retval; } static int tcf_blockcast_redir(struct sk_buff *skb, struct tcf_mirred *m, -- GitLab From 02f76a9cd4494719600baf1ab278930df39431ab Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Fri, 16 Feb 2024 15:30:48 +0530 Subject: [PATCH 1159/1405] drm/buddy: Modify duplicate list_splice_tail call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the duplicate list_splice_tail call when the total_allocated < size condition is true. Cc: # 6.7+ Fixes: 8746c6c9dfa3 ("drm/buddy: Fix alloc_range() error handling code") Reported-by: Bert Karwatzki Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240216100048.4101-1-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König --- drivers/gpu/drm/drm_buddy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index c1a99bf4dffd1..c4222b886db7c 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -538,13 +538,13 @@ static int __alloc_range(struct drm_buddy *mm, list_add(&block->left->tmp_link, dfs); } while (1); - list_splice_tail(&allocated, blocks); - if (total_allocated < size) { err = -ENOSPC; goto err_free; } + list_splice_tail(&allocated, blocks); + return 0; err_undo: -- GitLab From 0affdba22aca5573f9d989bcb1d71d32a6a03efe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Feb 2024 10:57:37 +0100 Subject: [PATCH 1160/1405] nouveau: fix function cast warnings clang-16 warns about casting between incompatible function types: drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c:161:10: error: cast from 'void (*)(const struct firmware *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 161 | .fini = (void(*)(void *))release_firmware, This one was done to use the generic shadow_fw_release() function as a callback for struct nvbios_source. Change it to use the same prototype as the other five instances, with a trivial helper function that actually calls release_firmware. Fixes: 70c0f263cc2e ("drm/nouveau/bios: pull in basic vbios subdev, more to come later") Signed-off-by: Arnd Bergmann Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240213095753.455062-1-arnd@kernel.org --- drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c index 19188683c8fca..8c2bf1c16f2a9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c @@ -154,11 +154,17 @@ shadow_fw_init(struct nvkm_bios *bios, const char *name) return (void *)fw; } +static void +shadow_fw_release(void *fw) +{ + release_firmware(fw); +} + static const struct nvbios_source shadow_fw = { .name = "firmware", .init = shadow_fw_init, - .fini = (void(*)(void *))release_firmware, + .fini = shadow_fw_release, .read = shadow_fw_read, .rw = false, }; -- GitLab From 65323796debe49a1922ba507020f7530a4b3f9af Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 13 Feb 2024 21:09:57 +0300 Subject: [PATCH 1161/1405] drm/nouveau/mmu/r535: uninitialized variable in r535_bar_new_() If gf100_bar_new_() fails then "bar" is not initialized. Fixes: 5bf0257136a2 ("drm/nouveau/mmu/r535: initial support") Signed-off-by: Dan Carpenter Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/dab21df7-4d90-4479-97d8-97e5d228c714@moroto.mountain --- drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c index 4135690326f44..3a30bea30e366 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c @@ -168,12 +168,11 @@ r535_bar_new_(const struct nvkm_bar_func *hw, struct nvkm_device *device, rm->flush = r535_bar_flush; ret = gf100_bar_new_(rm, device, type, inst, &bar); - *pbar = bar; if (ret) { - if (!bar) - kfree(rm); + kfree(rm); return ret; } + *pbar = bar; bar->flushBAR2PhysMode = ioremap(device->func->resource_addr(device, 3), PAGE_SIZE); if (!bar->flushBAR2PhysMode) -- GitLab From 250f5402e636a5cec9e0e95df252c3d54307210f Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Sun, 11 Feb 2024 23:43:14 +0100 Subject: [PATCH 1162/1405] parisc/ftrace: add missing CONFIG_DYNAMIC_FTRACE check Fixes a bug revealed by -Wmissing-prototypes when CONFIG_FUNCTION_GRAPH_TRACER is enabled but not CONFIG_DYNAMIC_FTRACE: arch/parisc/kernel/ftrace.c:82:5: error: no previous prototype for 'ftrace_enable_ftrace_graph_caller' [-Werror=missing-prototypes] 82 | int ftrace_enable_ftrace_graph_caller(void) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/parisc/kernel/ftrace.c:88:5: error: no previous prototype for 'ftrace_disable_ftrace_graph_caller' [-Werror=missing-prototypes] 88 | int ftrace_disable_ftrace_graph_caller(void) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Max Kellermann Signed-off-by: Helge Deller --- arch/parisc/kernel/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index d1defb9ede70c..621a4b386ae4f 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -78,7 +78,7 @@ asmlinkage void notrace __hot ftrace_function_trampoline(unsigned long parent, #endif } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER +#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_FUNCTION_GRAPH_TRACER) int ftrace_enable_ftrace_graph_caller(void) { static_key_enable(&ftrace_graph_enable.key); -- GitLab From f945a404ed8a6eeb9907ef122a8382df56c2a714 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 12 Feb 2024 00:09:45 +0100 Subject: [PATCH 1163/1405] parisc/kprobes: always include asm-generic/kprobes.h The NOKPROBE_SYMBOL macro (and others) were moved to asm-generic/kprobes.h in 2017 by commit 7d134b2ce639 ("kprobes: move kprobe declarations to asm-generic/kprobes.h"), and this new header was included by asm/kprobes.h unconditionally on all architectures. When kprobe support was added to parisc in 2017 by commit 8858ac8e9e9b1 ("parisc: Implement kprobes"), that header was only included when CONFIG_KPROBES was enabled. This can lead to build failures when NOKPROBE_SYMBOL is used, but CONFIG_KPROBES is disabled. This mistake however was never actually noticed because linux/kprobes.h also includes asm-generic/kprobes.h (though I do not understand why that is, because it also includes asm/kprobes.h). To prevent eventual build failures, I suggest to always include asm-generic/kprobes.h on parisc, just like all the other architectures do. This way, including asm/kprobes.h suffices, and nobody (outside of arch/) ever needs to explicitly include asm-generic/kprobes.h. Signed-off-by: Max Kellermann Signed-off-by: Helge Deller --- arch/parisc/include/asm/kprobes.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/kprobes.h b/arch/parisc/include/asm/kprobes.h index 0a175ac876980..0f42f5c8e3b66 100644 --- a/arch/parisc/include/asm/kprobes.h +++ b/arch/parisc/include/asm/kprobes.h @@ -10,9 +10,10 @@ #ifndef _PARISC_KPROBES_H #define _PARISC_KPROBES_H +#include + #ifdef CONFIG_KPROBES -#include #include #include #include -- GitLab From 172c0cf519fb52860157c57067f1a58cfc0aa861 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Fri, 16 Feb 2024 12:29:26 +0530 Subject: [PATCH 1164/1405] MAINTAINERS: Add Siddharth Vadapalli as PCI TI DRA7XX/J721E reviewer Since I have been contributing to the driver for a while and wish to help with the review process, add myself as a reviewer. Link: https://lore.kernel.org/r/20240216065926.473805-1-s-vadapalli@ti.com Signed-off-by: Siddharth Vadapalli Signed-off-by: Bjorn Helgaas --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a692..a80c590d00359 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16832,6 +16832,7 @@ F: drivers/pci/controller/dwc/*designware* PCI DRIVER FOR TI DRA7XX/J721E M: Vignesh Raghavendra +R: Siddharth Vadapalli L: linux-omap@vger.kernel.org L: linux-pci@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -- GitLab From 5429c8de56f6b2bd8f537df3a1e04e67b9c04282 Mon Sep 17 00:00:00 2001 From: Greg Joyce Date: Fri, 16 Feb 2024 15:04:17 -0600 Subject: [PATCH 1165/1405] block: sed-opal: handle empty atoms when parsing response The SED Opal response parsing function response_parse() does not handle the case of an empty atom in the response. This causes the entry count to be too high and the response fails to be parsed. Recognizing, but ignoring, empty atoms allows response handling to succeed. Signed-off-by: Greg Joyce Link: https://lore.kernel.org/r/20240216210417.3526064-2-gjoyce@linux.ibm.com Signed-off-by: Jens Axboe --- block/opal_proto.h | 1 + block/sed-opal.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/block/opal_proto.h b/block/opal_proto.h index dec7ce3a3edb7..d247a457bf6e3 100644 --- a/block/opal_proto.h +++ b/block/opal_proto.h @@ -71,6 +71,7 @@ enum opal_response_token { #define SHORT_ATOM_BYTE 0xBF #define MEDIUM_ATOM_BYTE 0xDF #define LONG_ATOM_BYTE 0xE3 +#define EMPTY_ATOM_BYTE 0xFF #define OPAL_INVAL_PARAM 12 #define OPAL_MANUFACTURED_INACTIVE 0x08 diff --git a/block/sed-opal.c b/block/sed-opal.c index 3d9e9cd250bd5..fa4dba5d85319 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -1056,16 +1056,20 @@ static int response_parse(const u8 *buf, size_t length, token_length = response_parse_medium(iter, pos); else if (pos[0] <= LONG_ATOM_BYTE) /* long atom */ token_length = response_parse_long(iter, pos); + else if (pos[0] == EMPTY_ATOM_BYTE) /* empty atom */ + token_length = 1; else /* TOKEN */ token_length = response_parse_token(iter, pos); if (token_length < 0) return token_length; + if (pos[0] != EMPTY_ATOM_BYTE) + num_entries++; + pos += token_length; total -= token_length; iter++; - num_entries++; } resp->num = num_entries; -- GitLab From 9b99c17f7510bed2adbe17751fb8abddba5620bc Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Fri, 12 Jan 2024 12:09:50 -0800 Subject: [PATCH 1166/1405] x86/numa: Fix the address overlap check in numa_fill_memblks() numa_fill_memblks() fills in the gaps in numa_meminfo memblks over a physical address range. To do so, it first creates a list of existing memblks that overlap that address range. The issue is that it is off by one when comparing to the end of the address range, so memblks that do not overlap are selected. The impact of selecting a memblk that does not actually overlap is that an existing memblk may be filled when the expected action is to do nothing and return NUMA_NO_MEMBLK to the caller. The caller can then add a new NUMA node and memblk. Replace the broken open-coded search for address overlap with the memblock helper memblock_addrs_overlap(). Update the kernel doc and in code comments. Suggested by: "Huang, Ying" Fixes: 8f012db27c95 ("x86/numa: Introduce numa_fill_memblks()") Signed-off-by: Alison Schofield Acked-by: Mike Rapoport (IBM) Acked-by: Dave Hansen Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/10a3e6109c34c21a8dd4c513cf63df63481a2b07.1705085543.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- arch/x86/mm/numa.c | 19 +++++++------------ include/linux/memblock.h | 2 ++ mm/memblock.c | 5 +++-- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index adc497b93f037..8ada9bbfad583 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -944,14 +944,12 @@ static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata; * @start: address to begin fill * @end: address to end fill * - * Find and extend numa_meminfo memblks to cover the @start-@end - * physical address range, such that the first memblk includes - * @start, the last memblk includes @end, and any gaps in between - * are filled. + * Find and extend numa_meminfo memblks to cover the physical + * address range @start-@end * * RETURNS: * 0 : Success - * NUMA_NO_MEMBLK : No memblk exists in @start-@end range + * NUMA_NO_MEMBLK : No memblks exist in address range @start-@end */ int __init numa_fill_memblks(u64 start, u64 end) @@ -963,17 +961,14 @@ int __init numa_fill_memblks(u64 start, u64 end) /* * Create a list of pointers to numa_meminfo memblks that - * overlap start, end. Exclude (start == bi->end) since - * end addresses in both a CFMWS range and a memblk range - * are exclusive. - * - * This list of pointers is used to make in-place changes - * that fill out the numa_meminfo memblks. + * overlap start, end. The list is used to make in-place + * changes that fill out the numa_meminfo memblks. */ for (int i = 0; i < mi->nr_blks; i++) { struct numa_memblk *bi = &mi->blk[i]; - if (start < bi->end && end >= bi->start) { + if (memblock_addrs_overlap(start, end - start, bi->start, + bi->end - bi->start)) { blk[count] = &mi->blk[i]; count++; } diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b695f9e946dab..e2082240586d0 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -121,6 +121,8 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size); int memblock_physmem_add(phys_addr_t base, phys_addr_t size); #endif void memblock_trim_memory(phys_addr_t align); +unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, + phys_addr_t base2, phys_addr_t size2); bool memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); bool memblock_validate_numa_coverage(unsigned long threshold_bytes); diff --git a/mm/memblock.c b/mm/memblock.c index 4dcb2ee35eca8..964eb72db539e 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -180,8 +180,9 @@ static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) /* * Address comparison utilities */ -static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, - phys_addr_t base2, phys_addr_t size2) +unsigned long __init_memblock +memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, + phys_addr_t size2) { return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); } -- GitLab From b626070ffc14acca5b87a2aa5f581db98617584c Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Fri, 12 Jan 2024 12:09:51 -0800 Subject: [PATCH 1167/1405] x86/numa: Fix the sort compare func used in numa_fill_memblks() The compare function used to sort memblks into starting address order fails when the result of its u64 address subtraction gets truncated to an int upon return. The impact of the bad sort is that memblks will be filled out incorrectly. Depending on the set of memblks, a user may see no errors at all but still have a bad fill, or see messages reporting a node overlap that leads to numa init failure: [] node 0 [mem: ] overlaps with node 1 [mem: ] [] No NUMA configuration found Replace with a comparison that can only result in: 1, 0, -1. Fixes: 8f012db27c95 ("x86/numa: Introduce numa_fill_memblks()") Signed-off-by: Alison Schofield Acked-by: Dave Hansen Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/99dcb3ae87e04995e9f293f6158dc8fa0749a487.1705085543.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- arch/x86/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 8ada9bbfad583..65e9a6e391c04 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -934,7 +934,7 @@ static int __init cmp_memblk(const void *a, const void *b) const struct numa_memblk *ma = *(const struct numa_memblk **)a; const struct numa_memblk *mb = *(const struct numa_memblk **)b; - return ma->start - mb->start; + return (ma->start > mb->start) - (ma->start < mb->start); } static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata; -- GitLab From 453a7fde8031a5192ed2f9646ad048c1a5e930dc Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 31 Jan 2024 13:59:30 -0800 Subject: [PATCH 1168/1405] cxl/region: Handle endpoint decoders in cxl_region_find_decoder() In preparation for adding a new caller of cxl_region_find_decoders() teach it to find a decoder from a cxl_endpoint_decoder structure. Combining switch and endpoint decoder lookup in one function prevents code duplication in call sites. Update the existing caller. Signed-off-by: Alison Schofield Tested-by: Wonjae Lee Link: https://lore.kernel.org/r/79ae6d72978ef9f3ceec9722e1cb793820553c8e.1706736863.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index ce0e2d82bb2b4..f1a9d1798d215 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -730,12 +730,17 @@ static int match_auto_decoder(struct device *dev, void *data) return 0; } -static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port, - struct cxl_region *cxlr) +static struct cxl_decoder * +cxl_region_find_decoder(struct cxl_port *port, + struct cxl_endpoint_decoder *cxled, + struct cxl_region *cxlr) { struct device *dev; int id = 0; + if (port == cxled_to_port(cxled)) + return &cxled->cxld; + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) dev = device_find_child(&port->dev, &cxlr->params, match_auto_decoder); @@ -853,10 +858,7 @@ static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr, { struct cxl_decoder *cxld; - if (port == cxled_to_port(cxled)) - cxld = &cxled->cxld; - else - cxld = cxl_region_find_decoder(port, cxlr); + cxld = cxl_region_find_decoder(port, cxled, cxlr); if (!cxld) { dev_dbg(&cxlr->dev, "%s: no decoder available\n", dev_name(&port->dev)); -- GitLab From cb66b1d60c283bb340a2fc19deff7de8acea74b1 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 31 Jan 2024 13:59:31 -0800 Subject: [PATCH 1169/1405] cxl/region: Allow out of order assembly of autodiscovered regions Autodiscovered regions can fail to assemble if they are not discovered in HPA decode order. The user will see failure messages like: [] cxl region0: endpoint5: HPA order violation region1 [] cxl region0: endpoint5: failed to allocate region reference The check that is causing the failure helps the CXL driver enforce a CXL spec mandate that decoders be committed in HPA order. The check is needless for autodiscovered regions since their decoders are already programmed. Trying to enforce order in the assembly of these regions is useless because they are assembled once all their member endpoints arrive, and there is no guarantee on the order in which endpoints are discovered during probe. Keep the existing check, but for autodiscovered regions, allow the out of order assembly after a sanity check that the lesser numbered decoder has the lesser HPA starting address. Signed-off-by: Alison Schofield Tested-by: Wonjae Lee Link: https://lore.kernel.org/r/3dec69ee97524ab229a20c6739272c3000b18408.1706736863.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 48 +++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index f1a9d1798d215..4c7fd2d5cccb2 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -758,8 +758,31 @@ cxl_region_find_decoder(struct cxl_port *port, return to_cxl_decoder(dev); } -static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port, - struct cxl_region *cxlr) +static bool auto_order_ok(struct cxl_port *port, struct cxl_region *cxlr_iter, + struct cxl_decoder *cxld) +{ + struct cxl_region_ref *rr = cxl_rr_load(port, cxlr_iter); + struct cxl_decoder *cxld_iter = rr->decoder; + + /* + * Allow the out of order assembly of auto-discovered regions. + * Per CXL Spec 3.1 8.2.4.20.12 software must commit decoders + * in HPA order. Confirm that the decoder with the lesser HPA + * starting address has the lesser id. + */ + dev_dbg(&cxld->dev, "check for HPA violation %s:%d < %s:%d\n", + dev_name(&cxld->dev), cxld->id, + dev_name(&cxld_iter->dev), cxld_iter->id); + + if (cxld_iter->id > cxld->id) + return true; + + return false; +} + +static struct cxl_region_ref * +alloc_region_ref(struct cxl_port *port, struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled) { struct cxl_region_params *p = &cxlr->params; struct cxl_region_ref *cxl_rr, *iter; @@ -769,16 +792,21 @@ static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port, xa_for_each(&port->regions, index, iter) { struct cxl_region_params *ip = &iter->region->params; - if (!ip->res) + if (!ip->res || ip->res->start < p->res->start) continue; - if (ip->res->start > p->res->start) { - dev_dbg(&cxlr->dev, - "%s: HPA order violation %s:%pr vs %pr\n", - dev_name(&port->dev), - dev_name(&iter->region->dev), ip->res, p->res); - return ERR_PTR(-EBUSY); + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { + struct cxl_decoder *cxld; + + cxld = cxl_region_find_decoder(port, cxled, cxlr); + if (auto_order_ok(port, iter->region, cxld)) + continue; } + dev_dbg(&cxlr->dev, "%s: HPA order violation %s:%pr vs %pr\n", + dev_name(&port->dev), + dev_name(&iter->region->dev), ip->res, p->res); + + return ERR_PTR(-EBUSY); } cxl_rr = kzalloc(sizeof(*cxl_rr), GFP_KERNEL); @@ -955,7 +983,7 @@ static int cxl_port_attach_region(struct cxl_port *port, nr_targets_inc = true; } } else { - cxl_rr = alloc_region_ref(port, cxlr); + cxl_rr = alloc_region_ref(port, cxlr, cxled); if (IS_ERR(cxl_rr)) { dev_dbg(&cxlr->dev, "%s: failed to allocate region reference\n", -- GitLab From 00413c15068276f6e5a50215849a8d4bd812b443 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 6 Feb 2024 12:03:37 -0700 Subject: [PATCH 1170/1405] cxl: Change 'struct cxl_memdev_state' *_perf_list to single 'struct cxl_dpa_perf' In order to address the issue with being able to expose qos_class sysfs attributes under 'ram' and 'pmem' sub-directories, the attributes must be defined as static attributes rather than under driver->dev_groups. To avoid implementing locking for accessing the 'struct cxl_dpa_perf` lists, convert the list to a single 'struct cxl_dpa_perf' entry in preparation to move the attributes to statically defined. While theoretically a partition may have multiple qos_class via CDAT, this has not been encountered with testing on available hardware. The code is simplified for now to not support the complex case until a use case is needed to support that. Link: https://lore.kernel.org/linux-cxl/65b200ba228f_2d43c29468@dwillia2-mobl3.amr.corp.intel.com.notmuch/ Suggested-by: Dan Williams Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/20240206190431.1810289-2-dave.jiang@intel.com Reviewed-by: Jonathan Cameron Signed-off-by: Dan Williams --- drivers/cxl/core/cdat.c | 82 ++++++++++++----------------------------- drivers/cxl/core/mbox.c | 4 +- drivers/cxl/cxlmem.h | 10 ++--- drivers/cxl/mem.c | 28 ++------------ 4 files changed, 34 insertions(+), 90 deletions(-) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 6fe11546889fa..d66acc917dac3 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -210,19 +210,12 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port, return 0; } -static void add_perf_entry(struct device *dev, struct dsmas_entry *dent, - struct list_head *list) +static void update_perf_entry(struct device *dev, struct dsmas_entry *dent, + struct cxl_dpa_perf *dpa_perf) { - struct cxl_dpa_perf *dpa_perf; - - dpa_perf = kzalloc(sizeof(*dpa_perf), GFP_KERNEL); - if (!dpa_perf) - return; - dpa_perf->dpa_range = dent->dpa_range; dpa_perf->coord = dent->coord; dpa_perf->qos_class = dent->qos_class; - list_add_tail(&dpa_perf->list, list); dev_dbg(dev, "DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n", dent->dpa_range.start, dpa_perf->qos_class, @@ -230,20 +223,6 @@ static void add_perf_entry(struct device *dev, struct dsmas_entry *dent, dent->coord.read_latency, dent->coord.write_latency); } -static void free_perf_ents(void *data) -{ - struct cxl_memdev_state *mds = data; - struct cxl_dpa_perf *dpa_perf, *n; - LIST_HEAD(discard); - - list_splice_tail_init(&mds->ram_perf_list, &discard); - list_splice_tail_init(&mds->pmem_perf_list, &discard); - list_for_each_entry_safe(dpa_perf, n, &discard, list) { - list_del(&dpa_perf->list); - kfree(dpa_perf); - } -} - static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds, struct xarray *dsmas_xa) { @@ -263,16 +242,14 @@ static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds, xa_for_each(dsmas_xa, index, dent) { if (resource_size(&cxlds->ram_res) && range_contains(&ram_range, &dent->dpa_range)) - add_perf_entry(dev, dent, &mds->ram_perf_list); + update_perf_entry(dev, dent, &mds->ram_perf); else if (resource_size(&cxlds->pmem_res) && range_contains(&pmem_range, &dent->dpa_range)) - add_perf_entry(dev, dent, &mds->pmem_perf_list); + update_perf_entry(dev, dent, &mds->pmem_perf); else dev_dbg(dev, "no partition for dsmas dpa: %#llx\n", dent->dpa_range.start); } - - devm_add_action_or_reset(&cxlds->cxlmd->dev, free_perf_ents, mds); } static int match_cxlrd_qos_class(struct device *dev, void *data) @@ -293,24 +270,24 @@ static int match_cxlrd_qos_class(struct device *dev, void *data) return 0; } -static void cxl_qos_match(struct cxl_port *root_port, - struct list_head *work_list, - struct list_head *discard_list) +static void reset_dpa_perf(struct cxl_dpa_perf *dpa_perf) { - struct cxl_dpa_perf *dpa_perf, *n; + *dpa_perf = (struct cxl_dpa_perf) { + .qos_class = CXL_QOS_CLASS_INVALID, + }; +} - list_for_each_entry_safe(dpa_perf, n, work_list, list) { - int rc; +static bool cxl_qos_match(struct cxl_port *root_port, + struct cxl_dpa_perf *dpa_perf) +{ + if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID) + return false; - if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID) - return; + if (!device_for_each_child(&root_port->dev, &dpa_perf->qos_class, + match_cxlrd_qos_class)) + return false; - rc = device_for_each_child(&root_port->dev, - (void *)&dpa_perf->qos_class, - match_cxlrd_qos_class); - if (!rc) - list_move_tail(&dpa_perf->list, discard_list); - } + return true; } static int match_cxlrd_hb(struct device *dev, void *data) @@ -334,23 +311,10 @@ static int match_cxlrd_hb(struct device *dev, void *data) return 0; } -static void discard_dpa_perf(struct list_head *list) -{ - struct cxl_dpa_perf *dpa_perf, *n; - - list_for_each_entry_safe(dpa_perf, n, list, list) { - list_del(&dpa_perf->list); - kfree(dpa_perf); - } -} -DEFINE_FREE(dpa_perf, struct list_head *, if (!list_empty(_T)) discard_dpa_perf(_T)) - static int cxl_qos_class_verify(struct cxl_memdev *cxlmd) { struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - LIST_HEAD(__discard); - struct list_head *discard __free(dpa_perf) = &__discard; struct cxl_port *root_port; int rc; @@ -363,16 +327,18 @@ static int cxl_qos_class_verify(struct cxl_memdev *cxlmd) root_port = &cxl_root->port; /* Check that the QTG IDs are all sane between end device and root decoders */ - cxl_qos_match(root_port, &mds->ram_perf_list, discard); - cxl_qos_match(root_port, &mds->pmem_perf_list, discard); + if (!cxl_qos_match(root_port, &mds->ram_perf)) + reset_dpa_perf(&mds->ram_perf); + if (!cxl_qos_match(root_port, &mds->pmem_perf)) + reset_dpa_perf(&mds->pmem_perf); /* Check to make sure that the device's host bridge is under a root decoder */ rc = device_for_each_child(&root_port->dev, (void *)cxlmd->endpoint->host_bridge, match_cxlrd_hb); if (!rc) { - list_splice_tail_init(&mds->ram_perf_list, discard); - list_splice_tail_init(&mds->pmem_perf_list, discard); + reset_dpa_perf(&mds->ram_perf); + reset_dpa_perf(&mds->pmem_perf); } return rc; diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 27166a4117057..9adda4795eb78 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1391,8 +1391,8 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) mds->cxlds.reg_map.host = dev; mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; - INIT_LIST_HEAD(&mds->ram_perf_list); - INIT_LIST_HEAD(&mds->pmem_perf_list); + mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID; + mds->pmem_perf.qos_class = CXL_QOS_CLASS_INVALID; return mds; } diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 5303d6942b880..20fb3b35e89e0 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -395,13 +395,11 @@ enum cxl_devtype { /** * struct cxl_dpa_perf - DPA performance property entry - * @list - list entry * @dpa_range - range for DPA address * @coord - QoS performance data (i.e. latency, bandwidth) * @qos_class - QoS Class cookies */ struct cxl_dpa_perf { - struct list_head list; struct range dpa_range; struct access_coordinate coord; int qos_class; @@ -471,8 +469,8 @@ struct cxl_dev_state { * @security: security driver state info * @fw: firmware upload / activation state * @mbox_send: @dev specific transport for transmitting mailbox commands - * @ram_perf_list: performance data entries matched to RAM - * @pmem_perf_list: performance data entries matched to PMEM + * @ram_perf: performance data entry matched to RAM partition + * @pmem_perf: performance data entry matched to PMEM partition * * See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for * details on capacity parameters. @@ -494,8 +492,8 @@ struct cxl_memdev_state { u64 next_volatile_bytes; u64 next_persistent_bytes; - struct list_head ram_perf_list; - struct list_head pmem_perf_list; + struct cxl_dpa_perf ram_perf; + struct cxl_dpa_perf pmem_perf; struct cxl_event_state event; struct cxl_poison_state poison; diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index c5c9d8e0d88d6..547f5a145fc54 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -221,18 +221,8 @@ static ssize_t ram_qos_class_show(struct device *dev, struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - struct cxl_dpa_perf *dpa_perf; - if (!dev->driver) - return -ENOENT; - - if (list_empty(&mds->ram_perf_list)) - return -ENOENT; - - dpa_perf = list_first_entry(&mds->ram_perf_list, struct cxl_dpa_perf, - list); - - return sysfs_emit(buf, "%d\n", dpa_perf->qos_class); + return sysfs_emit(buf, "%d\n", mds->ram_perf.qos_class); } static struct device_attribute dev_attr_ram_qos_class = @@ -244,18 +234,8 @@ static ssize_t pmem_qos_class_show(struct device *dev, struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - struct cxl_dpa_perf *dpa_perf; - - if (!dev->driver) - return -ENOENT; - - if (list_empty(&mds->pmem_perf_list)) - return -ENOENT; - - dpa_perf = list_first_entry(&mds->pmem_perf_list, struct cxl_dpa_perf, - list); - return sysfs_emit(buf, "%d\n", dpa_perf->qos_class); + return sysfs_emit(buf, "%d\n", mds->pmem_perf.qos_class); } static struct device_attribute dev_attr_pmem_qos_class = @@ -273,11 +253,11 @@ static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) return 0; if (a == &dev_attr_pmem_qos_class.attr) - if (list_empty(&mds->pmem_perf_list)) + if (mds->pmem_perf.qos_class == CXL_QOS_CLASS_INVALID) return 0; if (a == &dev_attr_ram_qos_class.attr) - if (list_empty(&mds->ram_perf_list)) + if (mds->ram_perf.qos_class == CXL_QOS_CLASS_INVALID) return 0; return a->mode; -- GitLab From 10cb393d769bed9473e1fa8d34e4d6d389db9155 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 6 Feb 2024 12:03:38 -0700 Subject: [PATCH 1171/1405] cxl: Remove unnecessary type cast in cxl_qos_class_verify() The passed in host bridge parameter for device_for_each_child() has unnecessary void * type cast. Remove the type cast. Suggested-by: Jonathan Cameron Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/20240206190431.1810289-3-dave.jiang@intel.com Reviewed-by: Jonathan Cameron Signed-off-by: Dan Williams --- drivers/cxl/core/cdat.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index d66acc917dac3..ecbd209ca70a3 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -334,8 +334,7 @@ static int cxl_qos_class_verify(struct cxl_memdev *cxlmd) /* Check to make sure that the device's host bridge is under a root decoder */ rc = device_for_each_child(&root_port->dev, - (void *)cxlmd->endpoint->host_bridge, - match_cxlrd_hb); + cxlmd->endpoint->host_bridge, match_cxlrd_hb); if (!rc) { reset_dpa_perf(&mds->ram_perf); reset_dpa_perf(&mds->pmem_perf); -- GitLab From cc214417f06f6b0a8f6da09220c8bcdb742210b9 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 6 Feb 2024 12:03:39 -0700 Subject: [PATCH 1172/1405] cxl: Fix sysfs export of qos_class for memdev Current implementation exports only to /sys/bus/cxl/devices/.../memN/qos_class. With both ram and pmem exposed, the second registered sysfs attribute is rejected as duplicate. It's not possible to create qos_class under the dev_groups via the driver due to the ram and pmem sysfs sub-directories already created by the device sysfs groups. Move the ram and pmem qos_class to the device sysfs groups and add a call to sysfs_update() after the perf data are validated so the qos_class can be visible. The end results should be /sys/bus/cxl/devices/.../memN/ram/qos_class and /sys/bus/cxl/devices/.../memN/pmem/qos_class. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/20240206190431.1810289-4-dave.jiang@intel.com Reviewed-by: Jonathan Cameron Signed-off-by: Dan Williams --- drivers/cxl/core/cdat.c | 1 + drivers/cxl/core/memdev.c | 63 +++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 2 ++ drivers/cxl/mem.c | 36 ---------------------- 4 files changed, 66 insertions(+), 36 deletions(-) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index ecbd209ca70a3..08fd0baea7a0e 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -382,6 +382,7 @@ void cxl_endpoint_parse_cdat(struct cxl_port *port) cxl_memdev_set_qos_class(cxlds, dsmas_xa); cxl_qos_class_verify(cxlmd); + cxl_memdev_update_perf(cxlmd); } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL); diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index dae8802ecdb01..d4e259f3a7e91 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -447,13 +447,41 @@ static struct attribute *cxl_memdev_attributes[] = { NULL, }; +static ssize_t pmem_qos_class_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + + return sysfs_emit(buf, "%d\n", mds->pmem_perf.qos_class); +} + +static struct device_attribute dev_attr_pmem_qos_class = + __ATTR(qos_class, 0444, pmem_qos_class_show, NULL); + static struct attribute *cxl_memdev_pmem_attributes[] = { &dev_attr_pmem_size.attr, + &dev_attr_pmem_qos_class.attr, NULL, }; +static ssize_t ram_qos_class_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + + return sysfs_emit(buf, "%d\n", mds->ram_perf.qos_class); +} + +static struct device_attribute dev_attr_ram_qos_class = + __ATTR(qos_class, 0444, ram_qos_class_show, NULL); + static struct attribute *cxl_memdev_ram_attributes[] = { &dev_attr_ram_size.attr, + &dev_attr_ram_qos_class.attr, NULL, }; @@ -477,14 +505,42 @@ static struct attribute_group cxl_memdev_attribute_group = { .is_visible = cxl_memdev_visible, }; +static umode_t cxl_ram_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + + if (a == &dev_attr_ram_qos_class.attr) + if (mds->ram_perf.qos_class == CXL_QOS_CLASS_INVALID) + return 0; + + return a->mode; +} + static struct attribute_group cxl_memdev_ram_attribute_group = { .name = "ram", .attrs = cxl_memdev_ram_attributes, + .is_visible = cxl_ram_visible, }; +static umode_t cxl_pmem_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + + if (a == &dev_attr_pmem_qos_class.attr) + if (mds->pmem_perf.qos_class == CXL_QOS_CLASS_INVALID) + return 0; + + return a->mode; +} + static struct attribute_group cxl_memdev_pmem_attribute_group = { .name = "pmem", .attrs = cxl_memdev_pmem_attributes, + .is_visible = cxl_pmem_visible, }; static umode_t cxl_memdev_security_visible(struct kobject *kobj, @@ -519,6 +575,13 @@ static const struct attribute_group *cxl_memdev_attribute_groups[] = { NULL, }; +void cxl_memdev_update_perf(struct cxl_memdev *cxlmd) +{ + sysfs_update_group(&cxlmd->dev.kobj, &cxl_memdev_ram_attribute_group); + sysfs_update_group(&cxlmd->dev.kobj, &cxl_memdev_pmem_attribute_group); +} +EXPORT_SYMBOL_NS_GPL(cxl_memdev_update_perf, CXL); + static const struct device_type cxl_memdev_type = { .name = "cxl_memdev", .release = cxl_memdev_release, diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index b6017c0c57b4d..003feebab79b5 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -880,6 +880,8 @@ void cxl_switch_parse_cdat(struct cxl_port *port); int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, struct access_coordinate *coord); +void cxl_memdev_update_perf(struct cxl_memdev *cxlmd); + /* * Unit test builds overrides this to __weak, find the 'strong' version * of these symbols in tools/testing/cxl/. diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 547f5a145fc54..0c79d9ce877cc 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -215,32 +215,6 @@ static ssize_t trigger_poison_list_store(struct device *dev, } static DEVICE_ATTR_WO(trigger_poison_list); -static ssize_t ram_qos_class_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - - return sysfs_emit(buf, "%d\n", mds->ram_perf.qos_class); -} - -static struct device_attribute dev_attr_ram_qos_class = - __ATTR(qos_class, 0444, ram_qos_class_show, NULL); - -static ssize_t pmem_qos_class_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - - return sysfs_emit(buf, "%d\n", mds->pmem_perf.qos_class); -} - -static struct device_attribute dev_attr_pmem_qos_class = - __ATTR(qos_class, 0444, pmem_qos_class_show, NULL); - static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = kobj_to_dev(kobj); @@ -252,21 +226,11 @@ static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) mds->poison.enabled_cmds)) return 0; - if (a == &dev_attr_pmem_qos_class.attr) - if (mds->pmem_perf.qos_class == CXL_QOS_CLASS_INVALID) - return 0; - - if (a == &dev_attr_ram_qos_class.attr) - if (mds->ram_perf.qos_class == CXL_QOS_CLASS_INVALID) - return 0; - return a->mode; } static struct attribute *cxl_mem_attrs[] = { &dev_attr_trigger_poison_list.attr, - &dev_attr_ram_qos_class.attr, - &dev_attr_pmem_qos_class.attr, NULL }; -- GitLab From 117132edc6900621337b77d29e953d0d01f32a5f Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 6 Feb 2024 12:03:40 -0700 Subject: [PATCH 1173/1405] cxl/test: Add support for qos_class checking Set a fake qos_class to a unique value in order to do simple testing of qos_class for root decoders and mem devs via user cxl_test. A mock function is added to set the fake qos_class values for memory device and overrides cxl_endpoint_parse_cdat() in cxl driver code. Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/20240206190431.1810289-5-dave.jiang@intel.com Reviewed-by: Jonathan Cameron Signed-off-by: Dan Williams --- tools/testing/cxl/Kbuild | 1 + tools/testing/cxl/test/cxl.c | 63 ++++++++++++++++++++++++++++++----- tools/testing/cxl/test/mock.c | 14 ++++++++ tools/testing/cxl/test/mock.h | 1 + 4 files changed, 70 insertions(+), 9 deletions(-) diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index caff3834671f9..030b388800f05 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -13,6 +13,7 @@ ldflags-y += --wrap=cxl_hdm_decode_init ldflags-y += --wrap=cxl_dvsec_rr_decode ldflags-y += --wrap=devm_cxl_add_rch_dport ldflags-y += --wrap=cxl_rcd_component_reg_phys +ldflags-y += --wrap=cxl_endpoint_parse_cdat DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index a3cdbb2be038c..908e0d0839369 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -15,6 +15,8 @@ static int interleave_arithmetic; +#define FAKE_QTG_ID 42 + #define NR_CXL_HOST_BRIDGES 2 #define NR_CXL_SINGLE_HOST 1 #define NR_CXL_RCH 1 @@ -209,7 +211,7 @@ static struct { .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, - .qtg_id = 0, + .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 4UL, }, .target = { 0 }, @@ -224,7 +226,7 @@ static struct { .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, - .qtg_id = 1, + .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, }, .target = { 0, 1, }, @@ -239,7 +241,7 @@ static struct { .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_PMEM, - .qtg_id = 2, + .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 4UL, }, .target = { 0 }, @@ -254,7 +256,7 @@ static struct { .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_PMEM, - .qtg_id = 3, + .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, }, .target = { 0, 1, }, @@ -269,7 +271,7 @@ static struct { .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_PMEM, - .qtg_id = 4, + .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 4UL, }, .target = { 2 }, @@ -284,7 +286,7 @@ static struct { .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, - .qtg_id = 5, + .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M, }, .target = { 3 }, @@ -301,7 +303,7 @@ static struct { .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_PMEM, - .qtg_id = 0, + .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, }, .target = { 0, }, @@ -317,7 +319,7 @@ static struct { .granularity = 0, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_PMEM, - .qtg_id = 1, + .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, }, .target = { 0, 1, }, @@ -333,7 +335,7 @@ static struct { .granularity = 0, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_PMEM, - .qtg_id = 0, + .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 16UL, }, .target = { 0, 1, 0, 1, }, @@ -976,6 +978,48 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) return 0; } +/* + * Faking the cxl_dpa_perf for the memdev when appropriate. + */ +static void dpa_perf_setup(struct cxl_port *endpoint, struct range *range, + struct cxl_dpa_perf *dpa_perf) +{ + dpa_perf->qos_class = FAKE_QTG_ID; + dpa_perf->dpa_range = *range; + dpa_perf->coord.read_latency = 500; + dpa_perf->coord.write_latency = 500; + dpa_perf->coord.read_bandwidth = 1000; + dpa_perf->coord.write_bandwidth = 1000; +} + +static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port) +{ + struct cxl_root *cxl_root __free(put_cxl_root) = + find_cxl_root(port); + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct range pmem_range = { + .start = cxlds->pmem_res.start, + .end = cxlds->pmem_res.end, + }; + struct range ram_range = { + .start = cxlds->ram_res.start, + .end = cxlds->ram_res.end, + }; + + if (!cxl_root) + return; + + if (range_len(&ram_range)) + dpa_perf_setup(port, &ram_range, &mds->ram_perf); + + if (range_len(&pmem_range)) + dpa_perf_setup(port, &pmem_range, &mds->pmem_perf); + + cxl_memdev_update_perf(cxlmd); +} + static struct cxl_mock_ops cxl_mock_ops = { .is_mock_adev = is_mock_adev, .is_mock_bridge = is_mock_bridge, @@ -989,6 +1033,7 @@ static struct cxl_mock_ops cxl_mock_ops = { .devm_cxl_setup_hdm = mock_cxl_setup_hdm, .devm_cxl_add_passthrough_decoder = mock_cxl_add_passthrough_decoder, .devm_cxl_enumerate_decoders = mock_cxl_enumerate_decoders, + .cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat, .list = LIST_HEAD_INIT(cxl_mock_ops.list), }; diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 1a61e68e30950..6f737941dc0e1 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -285,6 +285,20 @@ resource_size_t __wrap_cxl_rcd_component_reg_phys(struct device *dev, } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcd_component_reg_phys, CXL); +void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port) +{ + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); + + if (ops && ops->is_mock_dev(cxlmd->dev.parent)) + ops->cxl_endpoint_parse_cdat(port); + else + cxl_endpoint_parse_cdat(port); + put_cxl_mock_ops(index); +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); MODULE_IMPORT_NS(CXL); diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index a94223750346c..d1b0271d28220 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -25,6 +25,7 @@ struct cxl_mock_ops { int (*devm_cxl_add_passthrough_decoder)(struct cxl_port *port); int (*devm_cxl_enumerate_decoders)( struct cxl_hdm *hdm, struct cxl_endpoint_dvsec_info *info); + void (*cxl_endpoint_parse_cdat)(struct cxl_port *port); }; void register_cxl_mock_ops(struct cxl_mock_ops *ops); -- GitLab From 0cab687205986491302cd2e440ef1d253031c221 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 16 Feb 2024 17:01:13 +0100 Subject: [PATCH 1174/1405] cxl/pci: Fix disabling memory if DVSEC CXL Range does not match a CFMWS window The Linux CXL subsystem is built on the assumption that HPA == SPA. That is, the host physical address (HPA) the HDM decoder registers are programmed with are system physical addresses (SPA). During HDM decoder setup, the DVSEC CXL range registers (cxl-3.1, 8.1.3.8) are checked if the memory is enabled and the CXL range is in a HPA window that is described in a CFMWS structure of the CXL host bridge (cxl-3.1, 9.18.1.3). Now, if the HPA is not an SPA, the CXL range does not match a CFMWS window and the CXL memory range will be disabled then. The HDM decoder stops working which causes system memory being disabled and further a system hang during HDM decoder initialization, typically when a CXL enabled kernel boots. Prevent a system hang and do not disable the HDM decoder if the decoder's CXL range is not found in a CFMWS window. Note the change only fixes a hardware hang, but does not implement HPA/SPA translation. Support for this can be added in a follow on patch series. Signed-off-by: Robert Richter Fixes: 34e37b4c432c ("cxl/port: Enable HDM Capability after validating DVSEC Ranges") Cc: Link: https://lore.kernel.org/r/20240216160113.407141-1-rrichter@amd.com Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 480489f5644e1..e9e6c81ce034a 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -477,9 +477,9 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, allowed++; } - if (!allowed) { - cxl_set_mem_enable(cxlds, 0); - info->mem_enabled = 0; + if (!allowed && info->mem_enabled) { + dev_err(dev, "Range register decodes outside platform defined CXL ranges.\n"); + return -ENXIO; } /* -- GitLab From 9704669c386f9bbfef2e002e7e690c56b7dcf5de Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sat, 17 Feb 2024 21:25:42 +0900 Subject: [PATCH 1175/1405] tracing/probes: Fix to search structure fields correctly Fix to search a field from the structure which has anonymous union correctly. Since the reference `type` pointer was updated in the loop, the search loop suddenly aborted where it hits an anonymous union. Thus it can not find the field after the anonymous union. This avoids updating the cursor `type` pointer in the loop. Link: https://lore.kernel.org/all/170791694361.389532.10047514554799419688.stgit@devnote2/ Fixes: 302db0f5b3d8 ("tracing/probes: Add a function to search a member of a struct/union") Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_btf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_btf.c b/kernel/trace/trace_btf.c index ca224d53bfdcd..5bbdbcbbde3cd 100644 --- a/kernel/trace/trace_btf.c +++ b/kernel/trace/trace_btf.c @@ -91,8 +91,8 @@ const struct btf_member *btf_find_struct_member(struct btf *btf, for_each_member(i, type, member) { if (!member->name_off) { /* Anonymous union/struct: push it for later use */ - type = btf_type_skip_modifiers(btf, member->type, &tid); - if (type && top < BTF_ANON_STACK_MAX) { + if (btf_type_skip_modifiers(btf, member->type, &tid) && + top < BTF_ANON_STACK_MAX) { anon_stack[top].tid = tid; anon_stack[top++].offset = cur_offset + member->offset; -- GitLab From 5928d411557ec5d53832cdd39fc443704a3e5b77 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 17 Feb 2024 13:55:31 +0100 Subject: [PATCH 1176/1405] Documentation: Document the Linux Kernel CVE process The Linux kernel project now has the ability to assign CVEs to fixed issues, so document the process and how individual developers can get a CVE if one is not automatically assigned for their fixes. Reviewed-by: Kees Cook Reviewed-by: Konstantin Ryabitsev Reviewed-by: Krzysztof Kozlowski Reviewed-by: Lukas Bulwahn Signed-off-by: Sasha Levin Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/2024021731-essence-sadness-28fd@gregkh Signed-off-by: Greg Kroah-Hartman --- Documentation/process/cve.rst | 121 ++++++++++++++++++++++++ Documentation/process/index.rst | 1 + Documentation/process/security-bugs.rst | 5 +- MAINTAINERS | 5 + 4 files changed, 129 insertions(+), 3 deletions(-) create mode 100644 Documentation/process/cve.rst diff --git a/Documentation/process/cve.rst b/Documentation/process/cve.rst new file mode 100644 index 0000000000000..5e2753eff7294 --- /dev/null +++ b/Documentation/process/cve.rst @@ -0,0 +1,121 @@ +==== +CVEs +==== + +Common Vulnerabilities and Exposure (CVE®) numbers were developed as an +unambiguous way to identify, define, and catalog publicly disclosed +security vulnerabilities. Over time, their usefulness has declined with +regards to the kernel project, and CVE numbers were very often assigned +in inappropriate ways and for inappropriate reasons. Because of this, +the kernel development community has tended to avoid them. However, the +combination of continuing pressure to assign CVEs and other forms of +security identifiers, and ongoing abuses by individuals and companies +outside of the kernel community has made it clear that the kernel +community should have control over those assignments. + +The Linux kernel developer team does have the ability to assign CVEs for +potential Linux kernel security issues. This assignment is independent +of the :doc:`normal Linux kernel security bug reporting +process<../process/security-bugs>`. + +A list of all assigned CVEs for the Linux kernel can be found in the +archives of the linux-cve mailing list, as seen on +https://lore.kernel.org/linux-cve-announce/. To get notice of the +assigned CVEs, please `subscribe +`_ to that mailing list. + +Process +======= + +As part of the normal stable release process, kernel changes that are +potentially security issues are identified by the developers responsible +for CVE number assignments and have CVE numbers automatically assigned +to them. These assignments are published on the linux-cve-announce +mailing list as announcements on a frequent basis. + +Note, due to the layer at which the Linux kernel is in a system, almost +any bug might be exploitable to compromise the security of the kernel, +but the possibility of exploitation is often not evident when the bug is +fixed. Because of this, the CVE assignment team is overly cautious and +assign CVE numbers to any bugfix that they identify. This +explains the seemingly large number of CVEs that are issued by the Linux +kernel team. + +If the CVE assignment team misses a specific fix that any user feels +should have a CVE assigned to it, please email them at +and the team there will work with you on it. Note that no potential +security issues should be sent to this alias, it is ONLY for assignment +of CVEs for fixes that are already in released kernel trees. If you +feel you have found an unfixed security issue, please follow the +:doc:`normal Linux kernel security bug reporting +process<../process/security-bugs>`. + +No CVEs will be automatically assigned for unfixed security issues in +the Linux kernel; assignment will only automatically happen after a fix +is available and applied to a stable kernel tree, and it will be tracked +that way by the git commit id of the original fix. If anyone wishes to +have a CVE assigned before an issue is resolved with a commit, please +contact the kernel CVE assignment team at to get an +identifier assigned from their batch of reserved identifiers. + +No CVEs will be assigned for any issue found in a version of the kernel +that is not currently being actively supported by the Stable/LTS kernel +team. A list of the currently supported kernel branches can be found at +https://kernel.org/releases.html + +Disputes of assigned CVEs +========================= + +The authority to dispute or modify an assigned CVE for a specific kernel +change lies solely with the maintainers of the relevant subsystem +affected. This principle ensures a high degree of accuracy and +accountability in vulnerability reporting. Only those individuals with +deep expertise and intimate knowledge of the subsystem can effectively +assess the validity and scope of a reported vulnerability and determine +its appropriate CVE designation. Any attempt to modify or dispute a CVE +outside of this designated authority could lead to confusion, inaccurate +reporting, and ultimately, compromised systems. + +Invalid CVEs +============ + +If a security issue is found in a Linux kernel that is only supported by +a Linux distribution due to the changes that have been made by that +distribution, or due to the distribution supporting a kernel version +that is no longer one of the kernel.org supported releases, then a CVE +can not be assigned by the Linux kernel CVE team, and must be asked for +from that Linux distribution itself. + +Any CVE that is assigned against the Linux kernel for an actively +supported kernel version, by any group other than the kernel assignment +CVE team should not be treated as a valid CVE. Please notify the +kernel CVE assignment team at so that they can work to +invalidate such entries through the CNA remediation process. + +Applicability of specific CVEs +============================== + +As the Linux kernel can be used in many different ways, with many +different ways of accessing it by external users, or no access at all, +the applicability of any specific CVE is up to the user of Linux to +determine, it is not up to the CVE assignment team. Please do not +contact us to attempt to determine the applicability of any specific +CVE. + +Also, as the source tree is so large, and any one system only uses a +small subset of the source tree, any users of Linux should be aware that +large numbers of assigned CVEs are not relevant for their systems. + +In short, we do not know your use case, and we do not know what portions +of the kernel that you use, so there is no way for us to determine if a +specific CVE is relevant for your system. + +As always, it is best to take all released kernel changes, as they are +tested together in a unified whole by many community members, and not as +individual cherry-picked changes. Also note that for many bugs, the +solution to the overall problem is not found in a single change, but by +the sum of many fixes on top of each other. Ideally CVEs will be +assigned to all fixes for all issues, but sometimes we will fail to +notice fixes, therefore assume that some changes without a CVE assigned +might be relevant to take. + diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst index 6cb732dfcc722..de9cbb7bd7eb2 100644 --- a/Documentation/process/index.rst +++ b/Documentation/process/index.rst @@ -81,6 +81,7 @@ of special classes of bugs: regressions and security problems. handling-regressions security-bugs + cve embargoed-hardware-issues Maintainer information diff --git a/Documentation/process/security-bugs.rst b/Documentation/process/security-bugs.rst index 692a3ba56cca8..56c560a00b37a 100644 --- a/Documentation/process/security-bugs.rst +++ b/Documentation/process/security-bugs.rst @@ -99,9 +99,8 @@ CVE assignment The security team does not assign CVEs, nor do we require them for reports or fixes, as this can needlessly complicate the process and may delay the bug handling. If a reporter wishes to have a CVE identifier -assigned, they should find one by themselves, for example by contacting -MITRE directly. However under no circumstances will a patch inclusion -be delayed to wait for a CVE identifier to arrive. +assigned for a confirmed issue, they can contact the :doc:`kernel CVE +assignment team<../process/cve>` to obtain one. Non-disclosure agreements ------------------------- diff --git a/MAINTAINERS b/MAINTAINERS index 8999497011a26..4423fdfdd8ae2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5610,6 +5610,11 @@ S: Maintained F: Documentation/devicetree/bindings/net/can/ctu,ctucanfd.yaml F: drivers/net/can/ctucanfd/ +CVE ASSIGNMENT CONTACT +M: CVE Assignment Team +S: Maintained +F: Documentation/process/cve.rst + CW1200 WLAN driver S: Orphan F: drivers/net/wireless/st/cw1200/ -- GitLab From 97dde84026339e4b4af9a6301f825d1828d7874b Mon Sep 17 00:00:00 2001 From: Pavel Sakharov Date: Wed, 14 Feb 2024 12:27:17 +0300 Subject: [PATCH 1177/1405] net: stmmac: Fix incorrect dereference in interrupt handlers If 'dev' or 'data' is NULL, the 'priv' variable has an incorrect address when dereferencing calling netdev_err(). Since we get as 'dev_id' or 'data' what was passed as the 'dev' argument to request_irq() during interrupt initialization (that is, the net_device and rx/tx queue pointers initialized at the time of the call) and since there are usually no checks for the 'dev_id' argument in such handlers in other drivers, remove these checks from the handlers in stmmac driver. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 8532f613bc78 ("net: stmmac: introduce MSI Interrupt routines for mac, safety, RX & TX") Signed-off-by: Pavel Sakharov Reviewed-by: Serge Semin Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 75d0297045032..e80d77bd9f1f7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -6059,11 +6059,6 @@ static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id) struct net_device *dev = (struct net_device *)dev_id; struct stmmac_priv *priv = netdev_priv(dev); - if (unlikely(!dev)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -6079,11 +6074,6 @@ static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id) struct net_device *dev = (struct net_device *)dev_id; struct stmmac_priv *priv = netdev_priv(dev); - if (unlikely(!dev)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -6105,11 +6095,6 @@ static irqreturn_t stmmac_msi_intr_tx(int irq, void *data) dma_conf = container_of(tx_q, struct stmmac_dma_conf, tx_queue[chan]); priv = container_of(dma_conf, struct stmmac_priv, dma_conf); - if (unlikely(!data)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -6136,11 +6121,6 @@ static irqreturn_t stmmac_msi_intr_rx(int irq, void *data) dma_conf = container_of(rx_q, struct stmmac_dma_conf, rx_queue[chan]); priv = container_of(dma_conf, struct stmmac_priv, dma_conf); - if (unlikely(!data)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; -- GitLab From 081a0e3b0d4c061419d3f4679dec9f68725b17e4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Feb 2024 17:21:06 +0000 Subject: [PATCH 1178/1405] ipv4: properly combine dev_base_seq and ipv4.dev_addr_genid net->dev_base_seq and ipv4.dev_addr_genid are monotonically increasing. If we XOR their values, we could miss to detect if both values were changed with the same amount. Fixes: 0465277f6b3f ("ipv4: provide addr and netconf dump consistency info") Signed-off-by: Eric Dumazet Cc: Nicolas Dichtel Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ca0ff15dc8fa3..bc74f131fe4df 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1825,6 +1825,21 @@ static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb, return err; } +/* Combine dev_addr_genid and dev_base_seq to detect changes. + */ +static u32 inet_base_seq(const struct net *net) +{ + u32 res = atomic_read(&net->ipv4.dev_addr_genid) + + net->dev_base_seq; + + /* Must not return 0 (see nl_dump_check_consistent()). + * Chose a value far away from 0. + */ + if (!res) + res = 0x80000000; + return res; +} + static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; @@ -1876,8 +1891,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; head = &tgt_net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^ - tgt_net->dev_base_seq; + cb->seq = inet_base_seq(tgt_net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -2278,8 +2292,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb, idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ - net->dev_base_seq; + cb->seq = inet_base_seq(net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; -- GitLab From e898e4cd1aab271ca414f9ac6e08e4c761f6913c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Feb 2024 17:21:07 +0000 Subject: [PATCH 1179/1405] ipv6: properly combine dev_base_seq and ipv6.dev_addr_genid net->dev_base_seq and ipv6.dev_addr_genid are monotonically increasing. If we XOR their values, we could miss to detect if both values were changed with the same amount. Fixes: 63998ac24f83 ("ipv6: provide addr and netconf dump consistency info") Signed-off-by: Eric Dumazet Cc: Nicolas Dichtel Signed-off-by: Eric Dumazet Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 733ace18806c6..5a839c5fb1a5a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -708,6 +708,22 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, return err; } +/* Combine dev_addr_genid and dev_base_seq to detect changes. + */ +static u32 inet6_base_seq(const struct net *net) +{ + u32 res = atomic_read(&net->ipv6.dev_addr_genid) + + net->dev_base_seq; + + /* Must not return 0 (see nl_dump_check_consistent()). + * Chose a value far away from 0. + */ + if (!res) + res = 0x80000000; + return res; +} + + static int inet6_netconf_dump_devconf(struct sk_buff *skb, struct netlink_callback *cb) { @@ -741,8 +757,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ - net->dev_base_seq; + cb->seq = inet6_base_seq(net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -5362,7 +5377,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, } rcu_read_lock(); - cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq; + cb->seq = inet6_base_seq(tgt_net); for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &tgt_net->dev_index_head[h]; -- GitLab From 6c347be62ae963b301ead8e7fa7b9973e6e0d6e1 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 15 Feb 2024 19:25:28 +0100 Subject: [PATCH 1180/1405] mptcp: add needs_id for userspace appending addr When userspace PM requires to create an ID 0 subflow in "userspace pm create id 0 subflow" test like this: userspace_pm_add_sf $ns2 10.0.3.2 0 An ID 1 subflow, in fact, is created. Since in mptcp_pm_nl_append_new_local_addr(), 'id 0' will be treated as no ID is set by userspace, and will allocate a new ID immediately: if (!e->addr.id) e->addr.id = find_next_zero_bit(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, 1); To solve this issue, a new parameter needs_id is added for mptcp_userspace_pm_append_new_local_addr() to distinguish between whether userspace PM has set an ID 0 or whether userspace PM has not set any address. needs_id is true in mptcp_userspace_pm_get_local_id(), but false in mptcp_pm_nl_announce_doit() and mptcp_pm_nl_subflow_create_doit(). Fixes: e5ed101a6028 ("mptcp: userspace pm allow creating id 0 subflow") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/pm_userspace.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 4f3901d5b8ef8..e582b3b2d174d 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -26,7 +26,8 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk) } static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, - struct mptcp_pm_addr_entry *entry) + struct mptcp_pm_addr_entry *entry, + bool needs_id) { DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); struct mptcp_pm_addr_entry *match = NULL; @@ -41,7 +42,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, spin_lock_bh(&msk->pm.lock); list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) { addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true); - if (addr_match && entry->addr.id == 0) + if (addr_match && entry->addr.id == 0 && needs_id) entry->addr.id = e->addr.id; id_match = (e->addr.id == entry->addr.id); if (addr_match && id_match) { @@ -64,7 +65,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, } *e = *entry; - if (!e->addr.id) + if (!e->addr.id && needs_id) e->addr.id = find_next_zero_bit(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, 1); @@ -153,7 +154,7 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, if (new_entry.addr.port == msk_sport) new_entry.addr.port = 0; - return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry); + return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true); } int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) @@ -198,7 +199,7 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) goto announce_err; } - err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val); + err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); goto announce_err; @@ -378,7 +379,7 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) } local.addr = addr_l; - err = mptcp_userspace_pm_append_new_local_addr(msk, &local); + err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); goto create_err; -- GitLab From 584f3894262634596532cf43a5e782e34a0ce374 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 15 Feb 2024 19:25:29 +0100 Subject: [PATCH 1181/1405] mptcp: add needs_id for netlink appending addr Just the same as userspace PM, a new parameter needs_id is added for in-kernel PM mptcp_pm_nl_append_new_local_addr() too. Add a new helper mptcp_pm_has_addr_attr_id() to check whether an address ID is set from PM or not. In mptcp_pm_nl_get_local_id(), needs_id is always true, but in mptcp_pm_nl_add_addr_doit(), pass mptcp_pm_has_addr_attr_id() to needs_it. Fixes: efd5a4c04e18 ("mptcp: add the address ID assignment bitmap") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 287a60381eae6..a24c9128dee9e 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -901,7 +901,8 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) } static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, - struct mptcp_pm_addr_entry *entry) + struct mptcp_pm_addr_entry *entry, + bool needs_id) { struct mptcp_pm_addr_entry *cur, *del_entry = NULL; unsigned int addr_max; @@ -949,7 +950,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, } } - if (!entry->addr.id) { + if (!entry->addr.id && needs_id) { find_next: entry->addr.id = find_next_zero_bit(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, @@ -960,7 +961,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, } } - if (!entry->addr.id) + if (!entry->addr.id && needs_id) goto out; __set_bit(entry->addr.id, pernet->id_bitmap); @@ -1092,7 +1093,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc entry->ifindex = 0; entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; entry->lsk = NULL; - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true); if (ret < 0) kfree(entry); @@ -1285,6 +1286,18 @@ static int mptcp_nl_add_subflow_or_signal_addr(struct net *net) return 0; } +static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, + struct genl_info *info) +{ + struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; + + if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, + mptcp_pm_address_nl_policy, info->extack) && + tb[MPTCP_PM_ADDR_ATTR_ID]) + return true; + return false; +} + int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; @@ -1326,7 +1339,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) goto out_free; } } - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, + !mptcp_pm_has_addr_attr_id(attr, info)); if (ret < 0) { GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); goto out_free; -- GitLab From b8adb69a7d29c2d33eb327bca66476fb6066516b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 Feb 2024 19:25:30 +0100 Subject: [PATCH 1182/1405] mptcp: fix lockless access in subflow ULP diag Since the introduction of the subflow ULP diag interface, the dump callback accessed all the subflow data with lockless. We need either to annotate all the read and write operation accordingly, or acquire the subflow socket lock. Let's do latter, even if slower, to avoid a diffstat havoc. Fixes: 5147dfb50832 ("mptcp: allow dumping subflow context to userspace") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- net/mptcp/diag.c | 6 +++++- net/tls/tls_main.c | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index dd78a11810310..f6eba9652d010 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2506,7 +2506,7 @@ struct tcp_ulp_ops { /* cleanup ulp */ void (*release)(struct sock *sk); /* diagnostic */ - int (*get_info)(const struct sock *sk, struct sk_buff *skb); + int (*get_info)(struct sock *sk, struct sk_buff *skb); size_t (*get_info_size)(const struct sock *sk); /* clone ulp */ void (*clone)(const struct request_sock *req, struct sock *newsk, diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index a536586742f28..e57c5f47f0351 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -13,17 +13,19 @@ #include #include "protocol.h" -static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) +static int subflow_get_info(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *sf; struct nlattr *start; u32 flags = 0; + bool slow; int err; start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); if (!start) return -EMSGSIZE; + slow = lock_sock_fast(sk); rcu_read_lock(); sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data); if (!sf) { @@ -69,11 +71,13 @@ static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) } rcu_read_unlock(); + unlock_sock_fast(sk, slow); nla_nest_end(skb, start); return 0; nla_failure: rcu_read_unlock(); + unlock_sock_fast(sk, slow); nla_nest_cancel(skb, start); return err; } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 1c2c6800949dd..b4674f03d71a9 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -1003,7 +1003,7 @@ static u16 tls_user_config(struct tls_context *ctx, bool tx) return 0; } -static int tls_get_info(const struct sock *sk, struct sk_buff *skb) +static int tls_get_info(struct sock *sk, struct sk_buff *skb) { u16 version, cipher_type; struct tls_context *ctx; -- GitLab From a7cfe776637004a4c938fde78be4bd608c32c3ef Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 Feb 2024 19:25:31 +0100 Subject: [PATCH 1183/1405] mptcp: fix data races on local_id The local address id is accessed lockless by the NL PM, add all the required ONCE annotation. There is a caveat: the local id can be initialized late in the subflow life-cycle, and its validity is controlled by the local_id_valid flag. Remove such flag and encode the validity in the local_id field itself with negative value before initialization. That allows accessing the field consistently with a single read operation. Fixes: 0ee4261a3681 ("mptcp: implement mptcp_pm_remove_subflow") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/diag.c | 2 +- net/mptcp/pm_netlink.c | 6 +++--- net/mptcp/pm_userspace.c | 2 +- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 15 ++++++++++++--- net/mptcp/subflow.c | 9 +++++---- 6 files changed, 23 insertions(+), 13 deletions(-) diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index e57c5f47f0351..6ff6f14674aa2 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -65,7 +65,7 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) sf->map_data_len) || nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) || nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) || - nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) { + nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) { err = -EMSGSIZE; goto nla_failure; } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index a24c9128dee9e..912e25077437e 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -800,7 +800,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, mptcp_for_each_subflow_safe(msk, subflow, tmp) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); int how = RCV_SHUTDOWN | SEND_SHUTDOWN; - u8 id = subflow->local_id; + u8 id = subflow_get_local_id(subflow); if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id) continue; @@ -809,7 +809,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", - i, rm_id, subflow->local_id, subflow->remote_id, + i, rm_id, id, subflow->remote_id, msk->mpc_endpoint_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); @@ -1994,7 +1994,7 @@ static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) if (WARN_ON_ONCE(!sf)) return -EINVAL; - if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id)) + if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf))) return -EMSGSIZE; if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id)) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index e582b3b2d174d..d396a5973429b 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -234,7 +234,7 @@ static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { - if (subflow->local_id == 0) { + if (READ_ONCE(subflow->local_id) == 0) { has_id_0 = true; break; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 8ef2927ebca29..948606a537daf 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -85,7 +85,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) subflow->subflow_id = msk->subflow_id++; /* This is the first subflow, always with id 0 */ - subflow->local_id_valid = 1; + WRITE_ONCE(subflow->local_id, 0); mptcp_sock_graft(msk->first, sk->sk_socket); iput(SOCK_INODE(ssock)); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index ed50f2015dc38..631a7f445f349 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -491,10 +491,9 @@ struct mptcp_subflow_context { remote_key_valid : 1, /* received the peer key from */ disposable : 1, /* ctx can be free at ulp release time */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ - local_id_valid : 1, /* local_id is correctly initialized */ valid_csum_seen : 1, /* at least one csum validated */ is_mptfo : 1, /* subflow is doing TFO */ - __unused : 9; + __unused : 10; bool data_avail; bool scheduled; u32 remote_nonce; @@ -505,7 +504,7 @@ struct mptcp_subflow_context { u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */ u64 iasn; /* initial ack sequence number, MPC subflows only */ }; - u8 local_id; + s16 local_id; /* if negative not initialized yet */ u8 remote_id; u8 reset_seen:1; u8 reset_transient:1; @@ -556,6 +555,7 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) { memset(&subflow->reset, 0, sizeof(subflow->reset)); subflow->request_mptcp = 1; + WRITE_ONCE(subflow->local_id, -1); } static inline u64 @@ -1022,6 +1022,15 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) +{ + int local_id = READ_ONCE(subflow->local_id); + + if (local_id < 0) + return 0; + return local_id; +} + void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_work(struct mptcp_sock *msk); void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index c34ecadee1200..015184bbf06c2 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -577,8 +577,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id) { - subflow->local_id = local_id; - subflow->local_id_valid = 1; + WARN_ON_ONCE(local_id < 0 || local_id > 255); + WRITE_ONCE(subflow->local_id, local_id); } static int subflow_chk_local_id(struct sock *sk) @@ -587,7 +587,7 @@ static int subflow_chk_local_id(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(subflow->conn); int err; - if (likely(subflow->local_id_valid)) + if (likely(subflow->local_id >= 0)) return 0; err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk); @@ -1731,6 +1731,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, pr_debug("subflow=%p", ctx); ctx->tcp_sock = sk; + WRITE_ONCE(ctx->local_id, -1); return ctx; } @@ -1966,7 +1967,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->idsn = subflow_req->idsn; /* this is the first subflow, id is always 0 */ - new_ctx->local_id_valid = 1; + subflow_set_local_id(new_ctx, 0); } else if (subflow_req->mp_join) { new_ctx->ssn_offset = subflow_req->ssn_offset; new_ctx->mp_join = 1; -- GitLab From 967d3c27127e71a10ff5c083583a038606431b61 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 Feb 2024 19:25:32 +0100 Subject: [PATCH 1184/1405] mptcp: fix data races on remote_id Similar to the previous patch, address the data race on remote_id, adding the suitable ONCE annotations. Fixes: bedee0b56113 ("mptcp: address lookup improvements") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 8 ++++---- net/mptcp/subflow.c | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 912e25077437e..ed6983af1ab2b 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -443,7 +443,7 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, mptcp_for_each_subflow(msk, subflow) { ssk = mptcp_subflow_tcp_sock(subflow); remote_address((struct sock_common *)ssk, &addrs[i]); - addrs[i].id = subflow->remote_id; + addrs[i].id = READ_ONCE(subflow->remote_id); if (deny_id0 && !addrs[i].id) continue; @@ -799,18 +799,18 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, mptcp_for_each_subflow_safe(msk, subflow, tmp) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + u8 remote_id = READ_ONCE(subflow->remote_id); int how = RCV_SHUTDOWN | SEND_SHUTDOWN; u8 id = subflow_get_local_id(subflow); - if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id) + if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) continue; if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id)) continue; pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", - i, rm_id, id, subflow->remote_id, - msk->mpc_endpoint_id); + i, rm_id, id, remote_id, msk->mpc_endpoint_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 015184bbf06c2..71ba86246ff89 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -535,7 +535,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->backup = mp_opt.backup; subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; - subflow->remote_id = mp_opt.join_id; + WRITE_ONCE(subflow->remote_id, mp_opt.join_id); pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", subflow, subflow->thmac, subflow->remote_nonce, subflow->backup); @@ -1567,7 +1567,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk, remote_token, local_id, remote_id); subflow->remote_token = remote_token; - subflow->remote_id = remote_id; + WRITE_ONCE(subflow->remote_id, remote_id); subflow->request_join = 1; subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); subflow->subflow_id = msk->subflow_id++; @@ -1974,7 +1974,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->fully_established = 1; new_ctx->remote_key_valid = 1; new_ctx->backup = subflow_req->backup; - new_ctx->remote_id = subflow_req->remote_id; + WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id); new_ctx->token = subflow_req->token; new_ctx->thmac = subflow_req->thmac; -- GitLab From 045e9d812868a2d80b7a57b224ce8009444b7bbc Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 15 Feb 2024 19:25:33 +0100 Subject: [PATCH 1185/1405] mptcp: fix duplicate subflow creation Fullmesh endpoints could end-up unexpectedly generating duplicate subflows - same local and remote addresses - when multiple incoming ADD_ADDR are processed before the PM creates the subflow for the local endpoints. Address the issue explicitly checking for duplicates at subflow creation time. To avoid a quadratic computational complexity, track the unavailable remote address ids in a temporary bitmap and initialize such bitmap with the remote ids of all the existing subflows matching the local address currently processed. The above allows additionally replacing the existing code checking for duplicate entry in the current set with a simple bit test operation. Fixes: 2843ff6f36db ("mptcp: remote addresses fullmesh") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/435 Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index ed6983af1ab2b..58d17d9604e78 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -396,19 +396,6 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk) } } -static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr, - const struct mptcp_addr_info *addr) -{ - int i; - - for (i = 0; i < nr; i++) { - if (addrs[i].id == addr->id) - return true; - } - - return false; -} - /* Fill all the remote addresses into the array addrs[], * and return the array size. */ @@ -440,6 +427,16 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, msk->pm.subflows++; addrs[i++] = remote; } else { + DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + + /* Forbid creation of new subflows matching existing + * ones, possibly already created by incoming ADD_ADDR + */ + bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + mptcp_for_each_subflow(msk, subflow) + if (READ_ONCE(subflow->local_id) == local->id) + __set_bit(subflow->remote_id, unavail_id); + mptcp_for_each_subflow(msk, subflow) { ssk = mptcp_subflow_tcp_sock(subflow); remote_address((struct sock_common *)ssk, &addrs[i]); @@ -447,11 +444,17 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, if (deny_id0 && !addrs[i].id) continue; + if (test_bit(addrs[i].id, unavail_id)) + continue; + if (!mptcp_pm_addr_families_match(sk, local, &addrs[i])) continue; - if (!lookup_address_in_vec(addrs, i, &addrs[i]) && - msk->pm.subflows < subflows_max) { + if (msk->pm.subflows < subflows_max) { + /* forbid creating multiple address towards + * this id + */ + __set_bit(addrs[i].id, unavail_id); msk->pm.subflows++; i++; } -- GitLab From d2a2547565a9f1ad7989f7e21f97cbf065a9390d Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 15 Feb 2024 19:25:34 +0100 Subject: [PATCH 1186/1405] selftests: mptcp: pm nl: also list skipped tests If the feature is not supported by older kernels, and instead of just ignoring some tests, we should mark them as skipped, so we can still track them. Fixes: d85555ac11f9 ("selftests: mptcp: pm_netlink: format subtests results in TAP") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/pm_netlink.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 8f4ff123a7eb9..79e83a2c95ded 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -194,6 +194,12 @@ subflow 10.0.1.1" " (nofullmesh)" ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)" +else + for st in fullmesh nofullmesh backup,fullmesh; do + st=" (${st})" + printf "%-50s%s\n" "${st}" "[SKIP]" + mptcp_lib_result_skip "${st}" + done fi mptcp_lib_result_print_all_tap -- GitLab From 662f084f3396d8a804d56cb53ac05c9e39902a7b Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 15 Feb 2024 19:25:35 +0100 Subject: [PATCH 1187/1405] selftests: mptcp: pm nl: avoid error msg on older kernels Since the 'Fixes' commit mentioned below, and if the kernel being tested doesn't support the 'fullmesh' flag, this error will be printed: netlink error -22 (Invalid argument) ./pm_nl_ctl: bailing out due to netlink error[s] But that can be normal if the kernel doesn't support the feature, no need to print this worrying error message while everything else looks OK. So we can mute stderr. Failures will still be detected if any. Fixes: 1dc88d241f92 ("selftests: mptcp: pm_nl_ctl: always look for errors") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/pm_netlink.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 79e83a2c95ded..71899a3ffa7a9 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -183,7 +183,7 @@ check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ subflow 10.0.1.1" " (nobackup)" # fullmesh support has been added later -ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh +ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh 2>/dev/null if ip netns exec $ns1 ./pm_nl_ctl dump | grep -q "fullmesh" || mptcp_lib_expect_all_features; then check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ -- GitLab From 694bd45980a61045eb5ec07799e3b94c76db830e Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 15 Feb 2024 19:25:36 +0100 Subject: [PATCH 1188/1405] selftests: mptcp: diag: fix bash warnings on older kernels Since the 'Fixes' commit mentioned below, the command that is executed in __chk_nr() helper can return nothing if the feature is not supported. This is the case when the MPTCP CURRESTAB counter is not supported. To avoid this warning ... ./diag.sh: line 65: [: !=: unary operator expected ... we just need to surround '$nr' with double quotes, to support an empty string when the feature is not supported. Fixes: 81ab772819da ("selftests: mptcp: diag: check CURRESTAB counters") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/diag.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 04fcb8a077c99..e0615c6ffb8d1 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -62,8 +62,8 @@ __chk_nr() nr=$(eval $command) printf "%-50s" "$msg" - if [ $nr != $expected ]; then - if [ $nr = "$skip" ] && ! mptcp_lib_expect_all_features; then + if [ "$nr" != "$expected" ]; then + if [ "$nr" = "$skip" ] && ! mptcp_lib_expect_all_features; then echo "[ skip ] Feature probably not supported" mptcp_lib_result_skip "${msg}" else -- GitLab From 4d8e0dde0403b5a86aa83e243f020711a9c3e31f Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 15 Feb 2024 19:25:37 +0100 Subject: [PATCH 1189/1405] selftests: mptcp: simult flows: fix some subtest names The selftest was correctly recording all the results, but the 'reverse direction' part was missing in the name when needed. It is important to have a unique (sub)test name in TAP, because some CI environments drop tests with duplicated name. Fixes: 675d99338e7a ("selftests: mptcp: simult flows: format subtests results in TAP") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/simult_flows.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 0cc964e6f2c17..8f9ddb3ad4fe8 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -250,7 +250,8 @@ run_test() [ $bail -eq 0 ] || exit $ret fi - printf "%-60s" "$msg - reverse direction" + msg+=" - reverse direction" + printf "%-60s" "${msg}" do_transfer $large $small $time lret=$? mptcp_lib_result_code "${lret}" "${msg}" -- GitLab From 2ef0d804c090658960c008446523863fd7e3541e Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 15 Feb 2024 19:25:38 +0100 Subject: [PATCH 1190/1405] selftests: mptcp: userspace_pm: unique subtest names It is important to have a unique (sub)test name in TAP, because some CI environments drop tests with duplicated names. Some subtests from the userspace_pm selftest had the same names. That's because different subflows are created (and deleted) between the same pair of IP addresses. Simply adding the destination port in the name is then enough to have different names, because the destination port is always different. Note that adding such info takes a bit more space, so we need to increase a bit the width to print the name, simply to keep all the '[ OK ]' aligned as before. Fixes: f589234e1af0 ("selftests: mptcp: userspace_pm: format subtests results in TAP") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/userspace_pm.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 6167837f48e17..1b94a75604fee 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -75,7 +75,7 @@ print_test() { test_name="${1}" - _printf "%-63s" "${test_name}" + _printf "%-68s" "${test_name}" } print_results() @@ -542,7 +542,7 @@ verify_subflow_events() local remid local info - info="${e_saddr} (${e_from}) => ${e_daddr} (${e_to})" + info="${e_saddr} (${e_from}) => ${e_daddr}:${e_dport} (${e_to})" if [ "$e_type" = "$SUB_ESTABLISHED" ] then -- GitLab From 645c1dc965ef6b5554e5e69737bb179c7a0f872f Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 15 Feb 2024 19:25:39 +0100 Subject: [PATCH 1191/1405] selftests: mptcp: diag: unique 'in use' subtest names It is important to have a unique (sub)test name in TAP, because some CI environments drop tests with duplicated name. Some 'in use' subtests from the diag selftest had the same names, e.g.: chk 0 msk in use after flush Now the previous value is taken, to have different names, e.g.: chk 2->0 msk in use after flush While at it, avoid repeating the full message, declare it once in the helper. Fixes: ce9902573652 ("selftests: mptcp: diag: format subtests results in TAP") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/diag.sh | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index e0615c6ffb8d1..266656a16229d 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -166,9 +166,13 @@ chk_msk_listen() chk_msk_inuse() { local expected=$1 - local msg="$2" + local msg="....chk ${2:-${expected}} msk in use" local listen_nr + if [ "${expected}" -eq 0 ]; then + msg+=" after flush" + fi + listen_nr=$(ss -N "${ns}" -Ml | grep -c LISTEN) expected=$((expected + listen_nr)) @@ -179,7 +183,7 @@ chk_msk_inuse() sleep 0.1 done - __chk_nr get_msk_inuse $expected "$msg" 0 + __chk_nr get_msk_inuse $expected "${msg}" 0 } # $1: cestab nr @@ -227,11 +231,11 @@ wait_connected $ns 10000 chk_msk_nr 2 "after MPC handshake " chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" -chk_msk_inuse 2 "....chk 2 msk in use" +chk_msk_inuse 2 chk_msk_cestab 2 flush_pids -chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_inuse 0 "2->0" chk_msk_cestab 0 echo "a" | \ @@ -247,11 +251,11 @@ echo "b" | \ 127.0.0.1 >/dev/null & wait_connected $ns 10001 chk_msk_fallback_nr 1 "check fallback" -chk_msk_inuse 1 "....chk 1 msk in use" +chk_msk_inuse 1 chk_msk_cestab 1 flush_pids -chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_inuse 0 "1->0" chk_msk_cestab 0 NR_CLIENTS=100 @@ -273,11 +277,11 @@ for I in `seq 1 $NR_CLIENTS`; do done wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" -chk_msk_inuse $((NR_CLIENTS*2)) "....chk many msk in use" +chk_msk_inuse $((NR_CLIENTS*2)) "many" chk_msk_cestab $((NR_CLIENTS*2)) flush_pids -chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_inuse 0 "many->0" chk_msk_cestab 0 mptcp_lib_result_print_all_tap -- GitLab From 4103d8480866fe5abb71ef0ed8af3a3b7b9625bf Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Thu, 15 Feb 2024 19:25:40 +0100 Subject: [PATCH 1192/1405] selftests: mptcp: diag: unique 'cestab' subtest names It is important to have a unique (sub)test name in TAP, because some CI environments drop tests with duplicated name. Some 'cestab' subtests from the diag selftest had the same names, e.g.: ....chk 0 cestab Now the previous value is taken, to have different names, e.g.: ....chk 2->0 cestab after flush While at it, the 'after flush' info is added, similar to what is done with the 'in use' subtests. Also inspired by these 'in use' subtests, 'many' is displayed instead of a large number: many msk socket present [ ok ] ....chk many msk in use [ ok ] ....chk many cestab [ ok ] ....chk many->0 msk in use after flush [ ok ] ....chk many->0 cestab after flush [ ok ] Fixes: 81ab772819da ("selftests: mptcp: diag: check CURRESTAB counters") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/diag.sh | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 266656a16229d..0a58ebb8b04c9 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -189,10 +189,15 @@ chk_msk_inuse() # $1: cestab nr chk_msk_cestab() { - local cestab=$1 + local expected=$1 + local msg="....chk ${2:-${expected}} cestab" + + if [ "${expected}" -eq 0 ]; then + msg+=" after flush" + fi __chk_nr "mptcp_lib_get_counter ${ns} MPTcpExtMPCurrEstab" \ - "${cestab}" "....chk ${cestab} cestab" "" + "${expected}" "${msg}" "" } wait_connected() @@ -236,7 +241,7 @@ chk_msk_cestab 2 flush_pids chk_msk_inuse 0 "2->0" -chk_msk_cestab 0 +chk_msk_cestab 0 "2->0" echo "a" | \ timeout ${timeout_test} \ @@ -256,7 +261,7 @@ chk_msk_cestab 1 flush_pids chk_msk_inuse 0 "1->0" -chk_msk_cestab 0 +chk_msk_cestab 0 "1->0" NR_CLIENTS=100 for I in `seq 1 $NR_CLIENTS`; do @@ -278,11 +283,11 @@ done wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" chk_msk_inuse $((NR_CLIENTS*2)) "many" -chk_msk_cestab $((NR_CLIENTS*2)) +chk_msk_cestab $((NR_CLIENTS*2)) "many" flush_pids chk_msk_inuse 0 "many->0" -chk_msk_cestab 0 +chk_msk_cestab 0 "many->0" mptcp_lib_result_print_all_tap exit $ret -- GitLab From 5b76d928f8b779a1b19c5842e7cabee4cbb610c3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 15 Feb 2024 10:27:31 -0800 Subject: [PATCH 1193/1405] net: bcmasp: Indicate MAC is in charge of PHY PM Avoid the PHY library call unnecessarily into the suspend/resume functions by setting phydev->mac_managed_pm to true. The ASP driver essentially does exactly what mdio_bus_phy_resume() does. Fixes: 490cb412007d ("net: bcmasp: Add support for ASP2.0 Ethernet controller") Signed-off-by: Florian Fainelli Signed-off-by: Justin Chen Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c index f59557b0cd515..6ad1366270f79 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c @@ -1050,6 +1050,9 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect) netdev_err(dev, "could not attach to PHY\n"); goto err_phy_disable; } + + /* Indicate that the MAC is responsible for PHY PM */ + phydev->mac_managed_pm = true; } else if (!intf->wolopts) { ret = phy_resume(dev->phydev); if (ret) -- GitLab From f120e62e37f0af4c4cbe08e5a88ea60a6a17c858 Mon Sep 17 00:00:00 2001 From: Justin Chen Date: Thu, 15 Feb 2024 10:27:32 -0800 Subject: [PATCH 1194/1405] net: bcmasp: Sanity check is off by one A sanity check for OOB write is off by one leading to a false positive when the array is full. Fixes: 9b90aca97f6d ("net: ethernet: bcmasp: fix possible OOB write in bcmasp_netfilt_get_all_active()") Signed-off-by: Justin Chen Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/asp2/bcmasp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c index 29b04a274d077..80245c65cc904 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c @@ -535,9 +535,6 @@ int bcmasp_netfilt_get_all_active(struct bcmasp_intf *intf, u32 *rule_locs, int j = 0, i; for (i = 0; i < NUM_NET_FILTERS; i++) { - if (j == *rule_cnt) - return -EMSGSIZE; - if (!priv->net_filters[i].claimed || priv->net_filters[i].port != intf->port) continue; @@ -547,6 +544,9 @@ int bcmasp_netfilt_get_all_active(struct bcmasp_intf *intf, u32 *rule_locs, priv->net_filters[i - 1].wake_filter) continue; + if (j == *rule_cnt) + return -EMSGSIZE; + rule_locs[j++] = priv->net_filters[i].fs.location; } -- GitLab From b401b621758e46812da61fa58a67c3fd8d91de0d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 18 Feb 2024 12:56:25 -0800 Subject: [PATCH 1195/1405] Linux 6.8-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ed80c7d98a7ed..41fa8a2565f54 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* -- GitLab From a5c57fd2e9bd1c8ea8613a8f94fd0be5eccbf321 Mon Sep 17 00:00:00 2001 From: Gaurav Batra Date: Thu, 15 Feb 2024 16:18:33 -0600 Subject: [PATCH 1196/1405] powerpc/pseries/iommu: DLPAR add doesn't completely initialize pci_controller When a PCI device is dynamically added, the kernel oopses with a NULL pointer dereference: BUG: Kernel NULL pointer dereference on read at 0x00000030 Faulting instruction address: 0xc0000000006bbe5c Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: rpadlpar_io rpaphp rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs xsk_diag bonding nft_compat nf_tables nfnetlink rfkill binfmt_misc dm_multipath rpcrdma sunrpc rdma_ucm ib_srpt ib_isert iscsi_target_mod target_core_mod ib_umad ib_iser libiscsi scsi_transport_iscsi ib_ipoib rdma_cm iw_cm ib_cm mlx5_ib ib_uverbs ib_core pseries_rng drm drm_panel_orientation_quirks xfs libcrc32c mlx5_core mlxfw sd_mod t10_pi sg tls ibmvscsi ibmveth scsi_transport_srp vmx_crypto pseries_wdt psample dm_mirror dm_region_hash dm_log dm_mod fuse CPU: 17 PID: 2685 Comm: drmgr Not tainted 6.7.0-203405+ #66 Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1060.00 (NH1060_008) hv:phyp pSeries NIP: c0000000006bbe5c LR: c000000000a13e68 CTR: c0000000000579f8 REGS: c00000009924f240 TRAP: 0300 Not tainted (6.7.0-203405+) MSR: 8000000000009033 CR: 24002220 XER: 20040006 CFAR: c000000000a13e64 DAR: 0000000000000030 DSISR: 40000000 IRQMASK: 0 ... NIP sysfs_add_link_to_group+0x34/0x94 LR iommu_device_link+0x5c/0x118 Call Trace: iommu_init_device+0x26c/0x318 (unreliable) iommu_device_link+0x5c/0x118 iommu_init_device+0xa8/0x318 iommu_probe_device+0xc0/0x134 iommu_bus_notifier+0x44/0x104 notifier_call_chain+0xb8/0x19c blocking_notifier_call_chain+0x64/0x98 bus_notify+0x50/0x7c device_add+0x640/0x918 pci_device_add+0x23c/0x298 of_create_pci_dev+0x400/0x884 of_scan_pci_dev+0x124/0x1b0 __of_scan_bus+0x78/0x18c pcibios_scan_phb+0x2a4/0x3b0 init_phb_dynamic+0xb8/0x110 dlpar_add_slot+0x170/0x3b8 [rpadlpar_io] add_slot_store.part.0+0xb4/0x130 [rpadlpar_io] kobj_attr_store+0x2c/0x48 sysfs_kf_write+0x64/0x78 kernfs_fop_write_iter+0x1b0/0x290 vfs_write+0x350/0x4a0 ksys_write+0x84/0x140 system_call_exception+0x124/0x330 system_call_vectored_common+0x15c/0x2ec Commit a940904443e4 ("powerpc/iommu: Add iommu_ops to report capabilities and allow blocking domains") broke DLPAR add of PCI devices. The above added iommu_device structure to pci_controller. During system boot, PCI devices are discovered and this newly added iommu_device structure is initialized by a call to iommu_device_register(). During DLPAR add of a PCI device, a new pci_controller structure is allocated but there are no calls made to iommu_device_register() interface. Fix is to register the iommu device during DLPAR add as well. Fixes: a940904443e4 ("powerpc/iommu: Add iommu_ops to report capabilities and allow blocking domains") Signed-off-by: Gaurav Batra Reviewed-by: Brian King Signed-off-by: Michael Ellerman Link: https://msgid.link/20240215221833.4817-1-gbatra@linux.ibm.com --- arch/powerpc/include/asm/ppc-pci.h | 10 ++++++++++ arch/powerpc/kernel/iommu.c | 23 ++++++++++++++++------ arch/powerpc/platforms/pseries/pci_dlpar.c | 4 ++++ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index ce2b1b5eebddc..a8b7e8682f5bd 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -30,6 +30,16 @@ void *pci_traverse_device_nodes(struct device_node *start, void *data); extern void pci_devs_phb_init_dynamic(struct pci_controller *phb); +#if defined(CONFIG_IOMMU_API) && (defined(CONFIG_PPC_PSERIES) || \ + defined(CONFIG_PPC_POWERNV)) +extern void ppc_iommu_register_device(struct pci_controller *phb); +extern void ppc_iommu_unregister_device(struct pci_controller *phb); +#else +static inline void ppc_iommu_register_device(struct pci_controller *phb) { } +static inline void ppc_iommu_unregister_device(struct pci_controller *phb) { } +#endif + + /* From rtas_pci.h */ extern void init_pci_config_tokens (void); extern unsigned long get_phb_buid (struct device_node *); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index df17b33b89d13..2c0173e7094da 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1341,7 +1341,7 @@ static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev) struct pci_controller *hose; if (!dev_is_pci(dev)) - return ERR_PTR(-EPERM); + return ERR_PTR(-ENODEV); pdev = to_pci_dev(dev); hose = pdev->bus->sysdata; @@ -1390,6 +1390,21 @@ static const struct attribute_group *spapr_tce_iommu_groups[] = { NULL, }; +void ppc_iommu_register_device(struct pci_controller *phb) +{ + iommu_device_sysfs_add(&phb->iommu, phb->parent, + spapr_tce_iommu_groups, "iommu-phb%04x", + phb->global_number); + iommu_device_register(&phb->iommu, &spapr_tce_iommu_ops, + phb->parent); +} + +void ppc_iommu_unregister_device(struct pci_controller *phb) +{ + iommu_device_unregister(&phb->iommu); + iommu_device_sysfs_remove(&phb->iommu); +} + /* * This registers IOMMU devices of PHBs. This needs to happen * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and @@ -1400,11 +1415,7 @@ static int __init spapr_tce_setup_phb_iommus_initcall(void) struct pci_controller *hose; list_for_each_entry(hose, &hose_list, list_node) { - iommu_device_sysfs_add(&hose->iommu, hose->parent, - spapr_tce_iommu_groups, "iommu-phb%04x", - hose->global_number); - iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops, - hose->parent); + ppc_iommu_register_device(hose); } return 0; } diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 4ba8245681192..4448386268d99 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -35,6 +35,8 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn) pseries_msi_allocate_domains(phb); + ppc_iommu_register_device(phb); + /* Create EEH devices for the PHB */ eeh_phb_pe_create(phb); @@ -76,6 +78,8 @@ int remove_phb_dynamic(struct pci_controller *phb) } } + ppc_iommu_unregister_device(phb); + pseries_msi_free_domains(phb); /* Keep a reference so phb isn't freed yet */ -- GitLab From d70c7a6614ac2e1340a9fed044aec04d695c6645 Mon Sep 17 00:00:00 2001 From: Andreas Larsson Date: Mon, 29 Jan 2024 08:50:56 +0100 Subject: [PATCH 1197/1405] usb: uhci-grlib: Explicitly include linux/platform_device.h This fixes relying upon linux/of_platform.h to include linux/platform_device.h, which it no longer does, thereby fixing compilation problems like: In file included from drivers/usb/host/uhci-hcd.c:850: drivers/usb/host/uhci-grlib.c: In function 'uhci_hcd_grlib_probe': drivers/usb/host/uhci-grlib.c:92:29: error: invalid use of undefined type 'struct platform_device' 92 | struct device_node *dn = op->dev.of_node; | ^~ Fixes: ef175b29a242 ("of: Stop circularly including of_device.h and of_platform.h") Signed-off-by: Andreas Larsson Link: https://lore.kernel.org/r/20240129075056.1511630-1-andreas@gaisler.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/uhci-grlib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/uhci-grlib.c b/drivers/usb/host/uhci-grlib.c index ac3fc59703157..cfebb833668e4 100644 --- a/drivers/usb/host/uhci-grlib.c +++ b/drivers/usb/host/uhci-grlib.c @@ -22,6 +22,7 @@ #include #include #include +#include static int uhci_grlib_init(struct usb_hcd *hcd) { -- GitLab From 18a6be674306c9acb05c08e5c3fd376ef50a917c Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 6 Feb 2024 11:40:18 +0100 Subject: [PATCH 1198/1405] usb: cdnsp: blocked some cdns3 specific code host.c file has some parts of code that were introduced for CDNS3 driver and should not be used with CDNSP driver. This patch blocks using these parts of codes by CDNSP driver. These elements include: - xhci_plat_cdns3_xhci object - cdns3 specific XECP_PORT_CAP_REG register - cdns3 specific XECP_AUX_CTRL_REG1 register cc: stable@vger.kernel.org Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20240206104018.48272-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/host.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c index 6164fc4c96a49..ceca4d839dfd4 100644 --- a/drivers/usb/cdns3/host.c +++ b/drivers/usb/cdns3/host.c @@ -18,6 +18,11 @@ #include "../host/xhci.h" #include "../host/xhci-plat.h" +/* + * The XECP_PORT_CAP_REG and XECP_AUX_CTRL_REG1 exist only + * in Cadence USB3 dual-role controller, so it can't be used + * with Cadence CDNSP dual-role controller. + */ #define XECP_PORT_CAP_REG 0x8000 #define XECP_AUX_CTRL_REG1 0x8120 @@ -57,6 +62,8 @@ static const struct xhci_plat_priv xhci_plat_cdns3_xhci = { .resume_quirk = xhci_cdns3_resume_quirk, }; +static const struct xhci_plat_priv xhci_plat_cdnsp_xhci; + static int __cdns_host_init(struct cdns *cdns) { struct platform_device *xhci; @@ -81,8 +88,13 @@ static int __cdns_host_init(struct cdns *cdns) goto err1; } - cdns->xhci_plat_data = kmemdup(&xhci_plat_cdns3_xhci, - sizeof(struct xhci_plat_priv), GFP_KERNEL); + if (cdns->version < CDNSP_CONTROLLER_V2) + cdns->xhci_plat_data = kmemdup(&xhci_plat_cdns3_xhci, + sizeof(struct xhci_plat_priv), GFP_KERNEL); + else + cdns->xhci_plat_data = kmemdup(&xhci_plat_cdnsp_xhci, + sizeof(struct xhci_plat_priv), GFP_KERNEL); + if (!cdns->xhci_plat_data) { ret = -ENOMEM; goto err1; -- GitLab From 47625b018c6bc788bc10dd654c82696eb0a5ef11 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Thu, 15 Feb 2024 13:16:09 +0100 Subject: [PATCH 1199/1405] usb: cdnsp: fixed issue with incorrect detecting CDNSP family controllers Cadence have several controllers from 0x000403xx family but current driver suuport detecting only one with DID equal 0x0004034E. It causes that if someone uses different CDNSP controller then driver will use incorrect version and register space. Patch fix this issue. cc: stable@vger.kernel.org Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20240215121609.259772-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/core.c | 1 - drivers/usb/cdns3/drd.c | 13 +++++++++---- drivers/usb/cdns3/drd.h | 6 +++++- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c index 33548771a0d3a..465e9267b49c1 100644 --- a/drivers/usb/cdns3/core.c +++ b/drivers/usb/cdns3/core.c @@ -395,7 +395,6 @@ static int cdns_role_set(struct usb_role_switch *sw, enum usb_role role) return ret; } - /** * cdns_wakeup_irq - interrupt handler for wakeup events * @irq: irq number for cdns3/cdnsp core device diff --git a/drivers/usb/cdns3/drd.c b/drivers/usb/cdns3/drd.c index 04b6d12f2b9a3..ee917f1b091c8 100644 --- a/drivers/usb/cdns3/drd.c +++ b/drivers/usb/cdns3/drd.c @@ -156,7 +156,8 @@ bool cdns_is_device(struct cdns *cdns) */ static void cdns_otg_disable_irq(struct cdns *cdns) { - writel(0, &cdns->otg_irq_regs->ien); + if (cdns->version) + writel(0, &cdns->otg_irq_regs->ien); } /** @@ -422,15 +423,20 @@ int cdns_drd_init(struct cdns *cdns) cdns->otg_regs = (void __iomem *)&cdns->otg_v1_regs->cmd; - if (readl(&cdns->otg_cdnsp_regs->did) == OTG_CDNSP_DID) { + state = readl(&cdns->otg_cdnsp_regs->did); + + if (OTG_CDNSP_CHECK_DID(state)) { cdns->otg_irq_regs = (struct cdns_otg_irq_regs __iomem *) &cdns->otg_cdnsp_regs->ien; cdns->version = CDNSP_CONTROLLER_V2; - } else { + } else if (OTG_CDNS3_CHECK_DID(state)) { cdns->otg_irq_regs = (struct cdns_otg_irq_regs __iomem *) &cdns->otg_v1_regs->ien; writel(1, &cdns->otg_v1_regs->simulate); cdns->version = CDNS3_CONTROLLER_V1; + } else { + dev_err(cdns->dev, "not supporte DID=0x%08x\n", state); + return -EINVAL; } dev_dbg(cdns->dev, "DRD version v1 (ID: %08x, rev: %08x)\n", @@ -483,7 +489,6 @@ int cdns_drd_exit(struct cdns *cdns) return 0; } - /* Indicate the cdns3 core was power lost before */ bool cdns_power_is_lost(struct cdns *cdns) { diff --git a/drivers/usb/cdns3/drd.h b/drivers/usb/cdns3/drd.h index cbdf94f73ed91..d72370c321d39 100644 --- a/drivers/usb/cdns3/drd.h +++ b/drivers/usb/cdns3/drd.h @@ -79,7 +79,11 @@ struct cdnsp_otg_regs { __le32 susp_timing_ctrl; }; -#define OTG_CDNSP_DID 0x0004034E +/* CDNSP driver supports 0x000403xx Cadence USB controller family. */ +#define OTG_CDNSP_CHECK_DID(did) (((did) & GENMASK(31, 8)) == 0x00040300) + +/* CDNS3 driver supports 0x000402xx Cadence USB controller family. */ +#define OTG_CDNS3_CHECK_DID(did) (((did) & GENMASK(31, 8)) == 0x00040200) /* * Common registers interface for both CDNS3 and CDNSP version of DRD. -- GitLab From 1c9be13846c0b2abc2480602f8ef421360e1ad9e Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 29 Jan 2024 17:37:38 +0800 Subject: [PATCH 1200/1405] usb: roles: fix NULL pointer issue when put module's reference In current design, usb role class driver will get usb_role_switch parent's module reference after the user get usb_role_switch device and put the reference after the user put the usb_role_switch device. However, the parent device of usb_role_switch may be removed before the user put the usb_role_switch. If so, then, NULL pointer issue will be met when the user put the parent module's reference. This will save the module pointer in structure of usb_role_switch. Then, we don't need to find module by iterating long relations. Fixes: 5c54fcac9a9d ("usb: roles: Take care of driver module reference counting") cc: stable@vger.kernel.org Signed-off-by: Xu Yang Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240129093739.2371530-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/roles/class.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index ae41578bd0149..2bad038fb9ada 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c @@ -21,6 +21,7 @@ static const struct class role_class = { struct usb_role_switch { struct device dev; struct mutex lock; /* device lock*/ + struct module *module; /* the module this device depends on */ enum usb_role role; /* From descriptor */ @@ -135,7 +136,7 @@ struct usb_role_switch *usb_role_switch_get(struct device *dev) usb_role_switch_match); if (!IS_ERR_OR_NULL(sw)) - WARN_ON(!try_module_get(sw->dev.parent->driver->owner)); + WARN_ON(!try_module_get(sw->module)); return sw; } @@ -157,7 +158,7 @@ struct usb_role_switch *fwnode_usb_role_switch_get(struct fwnode_handle *fwnode) sw = fwnode_connection_find_match(fwnode, "usb-role-switch", NULL, usb_role_switch_match); if (!IS_ERR_OR_NULL(sw)) - WARN_ON(!try_module_get(sw->dev.parent->driver->owner)); + WARN_ON(!try_module_get(sw->module)); return sw; } @@ -172,7 +173,7 @@ EXPORT_SYMBOL_GPL(fwnode_usb_role_switch_get); void usb_role_switch_put(struct usb_role_switch *sw) { if (!IS_ERR_OR_NULL(sw)) { - module_put(sw->dev.parent->driver->owner); + module_put(sw->module); put_device(&sw->dev); } } @@ -189,15 +190,18 @@ struct usb_role_switch * usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode) { struct device *dev; + struct usb_role_switch *sw = NULL; if (!fwnode) return NULL; dev = class_find_device_by_fwnode(&role_class, fwnode); - if (dev) - WARN_ON(!try_module_get(dev->parent->driver->owner)); + if (dev) { + sw = to_role_switch(dev); + WARN_ON(!try_module_get(sw->module)); + } - return dev ? to_role_switch(dev) : NULL; + return sw; } EXPORT_SYMBOL_GPL(usb_role_switch_find_by_fwnode); @@ -338,6 +342,7 @@ usb_role_switch_register(struct device *parent, sw->set = desc->set; sw->get = desc->get; + sw->module = parent->driver->owner; sw->dev.parent = parent; sw->dev.fwnode = desc->fwnode; sw->dev.class = &role_class; -- GitLab From b787a3e781759026a6212736ef8e52cf83d1821a Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 29 Jan 2024 17:37:39 +0800 Subject: [PATCH 1201/1405] usb: roles: don't get/set_role() when usb_role_switch is unregistered There is a possibility that usb_role_switch device is unregistered before the user put usb_role_switch. In this case, the user may still want to get/set_role() since the user can't sense the changes of usb_role_switch. This will add a flag to show if usb_role_switch is already registered and avoid unwanted behaviors. Fixes: fde0aa6c175a ("usb: common: Small class for USB role switches") cc: stable@vger.kernel.org Signed-off-by: Xu Yang Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240129093739.2371530-2-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/roles/class.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index 2bad038fb9ada..70165dd86b5de 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c @@ -23,6 +23,7 @@ struct usb_role_switch { struct mutex lock; /* device lock*/ struct module *module; /* the module this device depends on */ enum usb_role role; + bool registered; /* From descriptor */ struct device *usb2_port; @@ -49,6 +50,9 @@ int usb_role_switch_set_role(struct usb_role_switch *sw, enum usb_role role) if (IS_ERR_OR_NULL(sw)) return 0; + if (!sw->registered) + return -EOPNOTSUPP; + mutex_lock(&sw->lock); ret = sw->set(sw, role); @@ -74,7 +78,7 @@ enum usb_role usb_role_switch_get_role(struct usb_role_switch *sw) { enum usb_role role; - if (IS_ERR_OR_NULL(sw)) + if (IS_ERR_OR_NULL(sw) || !sw->registered) return USB_ROLE_NONE; mutex_lock(&sw->lock); @@ -357,6 +361,8 @@ usb_role_switch_register(struct device *parent, return ERR_PTR(ret); } + sw->registered = true; + /* TODO: Symlinks for the host port and the device controller. */ return sw; @@ -371,8 +377,10 @@ EXPORT_SYMBOL_GPL(usb_role_switch_register); */ void usb_role_switch_unregister(struct usb_role_switch *sw) { - if (!IS_ERR_OR_NULL(sw)) + if (!IS_ERR_OR_NULL(sw)) { + sw->registered = false; device_unregister(&sw->dev); + } } EXPORT_SYMBOL_GPL(usb_role_switch_unregister); -- GitLab From cd45f99034b0c8c9cb346dd0d6407a95ca3d36f6 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 2 Feb 2024 10:42:16 -0500 Subject: [PATCH 1202/1405] usb: cdns3: fixed memory use after free at cdns3_gadget_ep_disable() ... cdns3_gadget_ep_free_request(&priv_ep->endpoint, &priv_req->request); list_del_init(&priv_req->list); ... 'priv_req' actually free at cdns3_gadget_ep_free_request(). But list_del_init() use priv_req->list after it. [ 1542.642868][ T534] BUG: KFENCE: use-after-free read in __list_del_entry_valid+0x10/0xd4 [ 1542.642868][ T534] [ 1542.653162][ T534] Use-after-free read at 0x000000009ed0ba99 (in kfence-#3): [ 1542.660311][ T534] __list_del_entry_valid+0x10/0xd4 [ 1542.665375][ T534] cdns3_gadget_ep_disable+0x1f8/0x388 [cdns3] [ 1542.671571][ T534] usb_ep_disable+0x44/0xe4 [ 1542.675948][ T534] ffs_func_eps_disable+0x64/0xc8 [ 1542.680839][ T534] ffs_func_set_alt+0x74/0x368 [ 1542.685478][ T534] ffs_func_disable+0x18/0x28 Move list_del_init() before cdns3_gadget_ep_free_request() to resolve this problem. Cc: stable@vger.kernel.org Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Signed-off-by: Frank Li Reviewed-by: Roger Quadros Acked-by: Peter Chen Link: https://lore.kernel.org/r/20240202154217.661867-1-Frank.Li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index aeca902ab6cc4..d6723d31fc6e2 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -2540,11 +2540,11 @@ static int cdns3_gadget_ep_disable(struct usb_ep *ep) while (!list_empty(&priv_ep->wa2_descmiss_req_list)) { priv_req = cdns3_next_priv_request(&priv_ep->wa2_descmiss_req_list); + list_del_init(&priv_req->list); kfree(priv_req->request.buf); cdns3_gadget_ep_free_request(&priv_ep->endpoint, &priv_req->request); - list_del_init(&priv_req->list); --priv_ep->wa2_counter; } -- GitLab From 5fd9e45f1ebcd57181358af28506e8a661a260b3 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Fri, 2 Feb 2024 10:42:17 -0500 Subject: [PATCH 1203/1405] usb: cdns3: fix memory double free when handle zero packet 829 if (request->complete) { 830 spin_unlock(&priv_dev->lock); 831 usb_gadget_giveback_request(&priv_ep->endpoint, 832 request); 833 spin_lock(&priv_dev->lock); 834 } 835 836 if (request->buf == priv_dev->zlp_buf) 837 cdns3_gadget_ep_free_request(&priv_ep->endpoint, request); Driver append an additional zero packet request when queue a packet, which length mod max packet size is 0. When transfer complete, run to line 831, usb_gadget_giveback_request() will free this requestion. 836 condition is true, so cdns3_gadget_ep_free_request() free this request again. Log: [ 1920.140696][ T150] BUG: KFENCE: use-after-free read in cdns3_gadget_giveback+0x134/0x2c0 [cdns3] [ 1920.140696][ T150] [ 1920.151837][ T150] Use-after-free read at 0x000000003d1cd10b (in kfence-#36): [ 1920.159082][ T150] cdns3_gadget_giveback+0x134/0x2c0 [cdns3] [ 1920.164988][ T150] cdns3_transfer_completed+0x438/0x5f8 [cdns3] Add check at line 829, skip call usb_gadget_giveback_request() if it is additional zero length packet request. Needn't call usb_gadget_giveback_request() because it is allocated in this driver. Cc: stable@vger.kernel.org Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Signed-off-by: Frank Li Reviewed-by: Roger Quadros Acked-by: Peter Chen Link: https://lore.kernel.org/r/20240202154217.661867-2-Frank.Li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-gadget.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index d6723d31fc6e2..fd1beb10bba72 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -828,7 +828,11 @@ void cdns3_gadget_giveback(struct cdns3_endpoint *priv_ep, return; } - if (request->complete) { + /* + * zlp request is appended by driver, needn't call usb_gadget_giveback_request() to notify + * gadget composite driver. + */ + if (request->complete && request->buf != priv_dev->zlp_buf) { spin_unlock(&priv_dev->lock); usb_gadget_giveback_request(&priv_ep->endpoint, request); -- GitLab From b191a18cb5c47109ca696370a74a5062a70adfd0 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Fri, 16 Feb 2024 00:41:02 +0000 Subject: [PATCH 1204/1405] usb: dwc3: gadget: Don't disconnect if not started Don't go through soft-disconnection sequence if the controller hasn't started. Otherwise, there will be timeout and warning reports from the soft-disconnection flow. Cc: stable@vger.kernel.org Fixes: 61a348857e86 ("usb: dwc3: gadget: Fix NULL pointer dereference in dwc3_gadget_suspend") Reported-by: Marek Szyprowski Closes: https://lore.kernel.org/linux-usb/20240215233536.7yejlj3zzkl23vjd@synopsys.com/T/#mb0661cd5f9272602af390c18392b9a36da4f96e6 Tested-by: Marek Szyprowski Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/e3be9b929934e0680a6f4b8f6eb11b18ae9c7e07.1708043922.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 4c8dd67246788..28f49400f3e8b 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2650,6 +2650,11 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc) int ret; spin_lock_irqsave(&dwc->lock, flags); + if (!dwc->pullups_connected) { + spin_unlock_irqrestore(&dwc->lock, flags); + return 0; + } + dwc->connected = false; /* -- GitLab From 858a74cb512833e276d96a72acb560ce8c138bec Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sat, 17 Feb 2024 21:20:42 +0200 Subject: [PATCH 1205/1405] usb: gadget: omap_udc: fix USB gadget regression on Palm TE When upgrading from 6.1 LTS to 6.6 LTS, I noticed the ethernet gadget stopped working on Palm TE. Commit 8825acd7cc8a ("ARM: omap1: remove dead code") deleted Palm TE from machine_without_vbus_sense(), although the board is still used. Fix that. Fixes: 8825acd7cc8a ("ARM: omap1: remove dead code") Cc: stable Signed-off-by: Aaro Koskinen Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240217192042.GA372205@darkstar.musicnaut.iki.fi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/omap_udc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c index 10c5d7f726a1f..f90eeecf27de1 100644 --- a/drivers/usb/gadget/udc/omap_udc.c +++ b/drivers/usb/gadget/udc/omap_udc.c @@ -2036,7 +2036,8 @@ static irqreturn_t omap_udc_iso_irq(int irq, void *_dev) static inline int machine_without_vbus_sense(void) { - return machine_is_omap_osk() || machine_is_sx1(); + return machine_is_omap_osk() || machine_is_omap_palmte() || + machine_is_sx1(); } static int omap_udc_start(struct usb_gadget *g, -- GitLab From 23b1d2d99b0f55326f05e7d757fa197c4a95dc5c Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Sat, 17 Feb 2024 17:20:21 +0100 Subject: [PATCH 1206/1405] Revert "usb: typec: tcpm: reset counter when enter into unattached state after try role" The reverted commit makes the state machine only ever go from SRC_ATTACH_WAIT to SNK_TRY in endless loop when toggling. After revert it goes to SRC_ATTACHED after initially trying SNK_TRY earlier, as it should for toggling to ever detect the power source mode and the port is again able to provide power to attached power sinks. This reverts commit 2d6d80127006ae3da26b1f21a65eccf957f2d1e5. Cc: stable@vger.kernel.org Fixes: 2d6d80127006 ("usb: typec: tcpm: reset counter when enter into unattached state after try role") Signed-off-by: Ondrej Jirman Link: https://lore.kernel.org/r/20240217162023.1719738-1-megi@xff.cz Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index f7d7daa60c8dc..295ae7eb912c8 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -3743,9 +3743,6 @@ static void tcpm_detach(struct tcpm_port *port) if (tcpm_port_is_disconnected(port)) port->hard_reset_count = 0; - port->try_src_count = 0; - port->try_snk_count = 0; - if (!port->attached) return; -- GitLab From 76c51146820c5dac629f21deafab0a7039bc3ccd Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Mon, 5 Feb 2024 13:16:50 +0530 Subject: [PATCH 1207/1405] usb: gadget: ncm: Avoid dropping datagrams of properly parsed NTBs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is observed sometimes when tethering is used over NCM with Windows 11 as host, at some instances, the gadget_giveback has one byte appended at the end of a proper NTB. When the NTB is parsed, unwrap call looks for any leftover bytes in SKB provided by u_ether and if there are any pending bytes, it treats them as a separate NTB and parses it. But in case the second NTB (as per unwrap call) is faulty/corrupt, all the datagrams that were parsed properly in the first NTB and saved in rx_list are dropped. Adding a few custom traces showed the following: [002] d..1 7828.532866: dwc3_gadget_giveback: ep1out: req 000000003868811a length 1025/16384 zsI ==> 0 [002] d..1 7828.532867: ncm_unwrap_ntb: K: ncm_unwrap_ntb toprocess: 1025 [002] d..1 7828.532867: ncm_unwrap_ntb: K: ncm_unwrap_ntb nth: 1751999342 [002] d..1 7828.532868: ncm_unwrap_ntb: K: ncm_unwrap_ntb seq: 0xce67 [002] d..1 7828.532868: ncm_unwrap_ntb: K: ncm_unwrap_ntb blk_len: 0x400 [002] d..1 7828.532868: ncm_unwrap_ntb: K: ncm_unwrap_ntb ndp_len: 0x10 [002] d..1 7828.532869: ncm_unwrap_ntb: K: Parsed NTB with 1 frames In this case, the giveback is of 1025 bytes and block length is 1024. The rest 1 byte (which is 0x00) won't be parsed resulting in drop of all datagrams in rx_list. Same is case with packets of size 2048: [002] d..1 7828.557948: dwc3_gadget_giveback: ep1out: req 0000000011dfd96e length 2049/16384 zsI ==> 0 [002] d..1 7828.557949: ncm_unwrap_ntb: K: ncm_unwrap_ntb nth: 1751999342 [002] d..1 7828.557950: ncm_unwrap_ntb: K: ncm_unwrap_ntb blk_len: 0x800 Lecroy shows one byte coming in extra confirming that the byte is coming in from PC: Transfer 2959 - Bytes Transferred(1025) Timestamp((18.524 843 590) - Transaction 8391 - Data(1025 bytes) Timestamp(18.524 843 590) --- Packet 4063861 Data(1024 bytes) Duration(2.117us) Idle(14.700ns) Timestamp(18.524 843 590) --- Packet 4063863 Data(1 byte) Duration(66.160ns) Time(282.000ns) Timestamp(18.524 845 722) According to Windows driver, no ZLP is needed if wBlockLength is non-zero, because the non-zero wBlockLength has already told the function side the size of transfer to be expected. However, there are in-market NCM devices that rely on ZLP as long as the wBlockLength is multiple of wMaxPacketSize. To deal with such devices, it pads an extra 0 at end so the transfer is no longer multiple of wMaxPacketSize. Cc: Fixes: 9f6ce4240a2b ("usb: gadget: f_ncm.c added") Signed-off-by: Krishna Kurapati Reviewed-by: Maciej Żenczykowski Link: https://lore.kernel.org/r/20240205074650.200304-1-quic_kriskura@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ncm.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index ca5d5f5649982..e2a059cfda2cd 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -1338,7 +1338,15 @@ static int ncm_unwrap_ntb(struct gether *port, "Parsed NTB with %d frames\n", dgram_counter); to_process -= block_len; - if (to_process != 0) { + + /* + * Windows NCM driver avoids USB ZLPs by adding a 1-byte + * zero pad as needed. + */ + if (to_process == 1 && + (*(unsigned char *)(ntb_ptr + block_len) == 0x00)) { + to_process--; + } else if (to_process > 0) { ntb_ptr = (unsigned char *)(ntb_ptr + block_len); goto parse_ntb; } -- GitLab From 84b6238aff3db8f4fa72bc5e451ba34ce288d30a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 15 Feb 2024 12:20:39 -0800 Subject: [PATCH 1208/1405] MAINTAINERS: Drop myself as maintainer of TYPEC port controller drivers I am no longer involved in Type-C development and not really current on its status and progress. Recently I have been doing more damage than good. It is time to go. Cc: Heikki Krogerus Cc: Greg Kroah-Hartman Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20240215202039.1982539-1-linux@roeck-us.net Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9ed4d38685394..5cfa6939a1047 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22878,9 +22878,8 @@ S: Maintained F: drivers/usb/typec/mux/pi3usb30532.c USB TYPEC PORT CONTROLLER DRIVERS -M: Guenter Roeck L: linux-usb@vger.kernel.org -S: Maintained +S: Orphan F: drivers/usb/typec/tcpm/ USB UHCI DRIVER -- GitLab From bd915ae73a2d78559b376ad2caf5e4ef51de2455 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Thu, 15 Feb 2024 23:04:42 +0100 Subject: [PATCH 1209/1405] drm/meson: Don't remove bridges which are created by other drivers Stop calling drm_bridge_remove() for bridges allocated/managed by other drivers in the remove paths of meson_encoder_{cvbs,dsi,hdmi}. drm_bridge_remove() unregisters the bridge so it cannot be used anymore. Doing so for bridges we don't own can lead to the video pipeline not being able to come up after -EPROBE_DEFER of the VPU because we're unregistering a bridge that's managed by another driver. The other driver doesn't know that we have unregistered it's bridge and on subsequent .probe() we're not able to find those bridges anymore (since nobody re-creates them). This fixes probe errors on Meson8b boards with the CVBS outputs enabled. Fixes: 09847723c12f ("drm/meson: remove drm bridges at aggregate driver unbind time") Fixes: 42dcf15f901c ("drm/meson: add DSI encoder") Cc: Reported-by: Steve Morvai Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Tested-by: Steve Morvai Link: https://lore.kernel.org/r/20240215220442.1343152-1-martin.blumenstingl@googlemail.com Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240215220442.1343152-1-martin.blumenstingl@googlemail.com --- drivers/gpu/drm/meson/meson_encoder_cvbs.c | 1 - drivers/gpu/drm/meson/meson_encoder_dsi.c | 1 - drivers/gpu/drm/meson/meson_encoder_hdmi.c | 1 - 3 files changed, 3 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_encoder_cvbs.c b/drivers/gpu/drm/meson/meson_encoder_cvbs.c index 3f73b211fa8e3..3407450435e20 100644 --- a/drivers/gpu/drm/meson/meson_encoder_cvbs.c +++ b/drivers/gpu/drm/meson/meson_encoder_cvbs.c @@ -294,6 +294,5 @@ void meson_encoder_cvbs_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_CVBS]) { meson_encoder_cvbs = priv->encoders[MESON_ENC_CVBS]; drm_bridge_remove(&meson_encoder_cvbs->bridge); - drm_bridge_remove(meson_encoder_cvbs->next_bridge); } } diff --git a/drivers/gpu/drm/meson/meson_encoder_dsi.c b/drivers/gpu/drm/meson/meson_encoder_dsi.c index 3f93c70488cad..311b91630fbe5 100644 --- a/drivers/gpu/drm/meson/meson_encoder_dsi.c +++ b/drivers/gpu/drm/meson/meson_encoder_dsi.c @@ -168,6 +168,5 @@ void meson_encoder_dsi_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_DSI]) { meson_encoder_dsi = priv->encoders[MESON_ENC_DSI]; drm_bridge_remove(&meson_encoder_dsi->bridge); - drm_bridge_remove(meson_encoder_dsi->next_bridge); } } diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index 25ea765586908..c4686568c9ca5 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -474,6 +474,5 @@ void meson_encoder_hdmi_remove(struct meson_drm *priv) if (priv->encoders[MESON_ENC_HDMI]) { meson_encoder_hdmi = priv->encoders[MESON_ENC_HDMI]; drm_bridge_remove(&meson_encoder_hdmi->bridge); - drm_bridge_remove(meson_encoder_hdmi->next_bridge); } } -- GitLab From 8b79d4e994074a058b6876dce843ee112656258d Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Wed, 14 Feb 2024 07:34:30 -0800 Subject: [PATCH 1210/1405] tty: hvc: Don't enable the RISC-V SBI console by default The new SBI console has the same problem as the old one: there's only one shared backing hardware and no synchronization, so the two drivers end up stepping on each other. This was the same issue the old SBI-0.1 console drivers had, but that was disabled by default when SBI-0.1 was. So just mark the new driver as nonportable. Reported-by: Emil Renner Berthing Fixes: 88ead68e764c ("tty: Add SBI debug console support to HVC SBI driver") Signed-off-by: Palmer Dabbelt Reviewed-by: Paul Walmsley Reviewed-by: Anup Patel Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20240214153429.16484-2-palmer@rivosinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/hvc/Kconfig | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/tty/hvc/Kconfig b/drivers/tty/hvc/Kconfig index 6e05c5c7bca1a..c2a4e88b328f3 100644 --- a/drivers/tty/hvc/Kconfig +++ b/drivers/tty/hvc/Kconfig @@ -108,13 +108,15 @@ config HVC_DCC_SERIALIZE_SMP config HVC_RISCV_SBI bool "RISC-V SBI console support" - depends on RISCV_SBI + depends on RISCV_SBI && NONPORTABLE select HVC_DRIVER help This enables support for console output via RISC-V SBI calls, which - is normally used only during boot to output printk. + is normally used only during boot to output printk. This driver + conflicts with real console drivers and should not be enabled on + systems that directly access the console. - If you don't know what do to here, say Y. + If you don't know what do to here, say N. config HVCS tristate "IBM Hypervisor Virtual Console Server support" -- GitLab From f418ae73311deb901c0110b08d1bbafc20c1820e Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Fri, 16 Feb 2024 23:47:07 +0100 Subject: [PATCH 1211/1405] serial: stm32: do not always set SER_RS485_RX_DURING_TX if RS485 is enabled Before commit 07c30ea5861f ("serial: Do not hold the port lock when setting rx-during-tx GPIO") the SER_RS485_RX_DURING_TX flag was only set if the rx-during-tx mode was not controlled by a GPIO. Now the flag is set unconditionally when RS485 is enabled. This results in an incorrect setting if the rx-during-tx GPIO is not asserted. Fix this by setting the flag only if the rx-during-tx mode is not controlled by a GPIO and thus restore the correct behaviour. Cc: stable@vger.kernel.org # 6.6+ Fixes: 07c30ea5861f ("serial: Do not hold the port lock when setting rx-during-tx GPIO") Signed-off-by: Lino Sanfilippo Link: https://lore.kernel.org/r/20240216224709.9928-1-l.sanfilippo@kunbus.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/stm32-usart.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 794b775127403..693e932d6feb5 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -251,7 +251,9 @@ static int stm32_usart_config_rs485(struct uart_port *port, struct ktermios *ter writel_relaxed(cr3, port->membase + ofs->cr3); writel_relaxed(cr1, port->membase + ofs->cr1); - rs485conf->flags |= SER_RS485_RX_DURING_TX; + if (!port->rs485_rx_during_tx_gpio) + rs485conf->flags |= SER_RS485_RX_DURING_TX; + } else { stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_DEM | USART_CR3_DEP); -- GitLab From 3b69e32e151bc4a4e3c785cbdb1f918d5ee337ed Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Fri, 16 Feb 2024 23:47:08 +0100 Subject: [PATCH 1212/1405] serial: amba-pl011: Fix DMA transmission in RS485 mode When DMA is used in RS485 mode make sure that the UARTs tx section is enabled before the DMA buffers are queued for transmission. Cc: stable@vger.kernel.org Fixes: 8d479237727c ("serial: amba-pl011: add RS485 support") Signed-off-by: Lino Sanfilippo Link: https://lore.kernel.org/r/20240216224709.9928-2-l.sanfilippo@kunbus.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 60 ++++++++++++++++----------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index fccec1698a541..cf2c890a560f0 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1339,11 +1339,41 @@ static void pl011_start_tx_pio(struct uart_amba_port *uap) } } +static void pl011_rs485_tx_start(struct uart_amba_port *uap) +{ + struct uart_port *port = &uap->port; + u32 cr; + + /* Enable transmitter */ + cr = pl011_read(uap, REG_CR); + cr |= UART011_CR_TXE; + + /* Disable receiver if half-duplex */ + if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) + cr &= ~UART011_CR_RXE; + + if (port->rs485.flags & SER_RS485_RTS_ON_SEND) + cr &= ~UART011_CR_RTS; + else + cr |= UART011_CR_RTS; + + pl011_write(cr, uap, REG_CR); + + if (port->rs485.delay_rts_before_send) + mdelay(port->rs485.delay_rts_before_send); + + uap->rs485_tx_started = true; +} + static void pl011_start_tx(struct uart_port *port) { struct uart_amba_port *uap = container_of(port, struct uart_amba_port, port); + if ((uap->port.rs485.flags & SER_RS485_ENABLED) && + !uap->rs485_tx_started) + pl011_rs485_tx_start(uap); + if (!pl011_dma_tx_start(uap)) pl011_start_tx_pio(uap); } @@ -1424,42 +1454,12 @@ static bool pl011_tx_char(struct uart_amba_port *uap, unsigned char c, return true; } -static void pl011_rs485_tx_start(struct uart_amba_port *uap) -{ - struct uart_port *port = &uap->port; - u32 cr; - - /* Enable transmitter */ - cr = pl011_read(uap, REG_CR); - cr |= UART011_CR_TXE; - - /* Disable receiver if half-duplex */ - if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) - cr &= ~UART011_CR_RXE; - - if (port->rs485.flags & SER_RS485_RTS_ON_SEND) - cr &= ~UART011_CR_RTS; - else - cr |= UART011_CR_RTS; - - pl011_write(cr, uap, REG_CR); - - if (port->rs485.delay_rts_before_send) - mdelay(port->rs485.delay_rts_before_send); - - uap->rs485_tx_started = true; -} - /* Returns true if tx interrupts have to be (kept) enabled */ static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq) { struct circ_buf *xmit = &uap->port.state->xmit; int count = uap->fifosize >> 1; - if ((uap->port.rs485.flags & SER_RS485_ENABLED) && - !uap->rs485_tx_started) - pl011_rs485_tx_start(uap); - if (uap->port.x_char) { if (!pl011_tx_char(uap, uap->port.x_char, from_irq)) return true; -- GitLab From cd65c48d66920457129584553f217005d09b1edb Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 15 Feb 2024 10:33:25 +0800 Subject: [PATCH 1213/1405] selftests: bonding: set active slave to primary eth1 specifically In bond priority testing, we set the primary interface to eth1 and add eth0,1,2 to bond in serial. This is OK in normal times. But when in debug kernel, the bridge port that eth0,1,2 connected would start slowly (enter blocking, forwarding state), which caused the primary interface down for a while after enslaving and active slave changed. Here is a test log from Jakub's debug test[1]. [ 400.399070][ T50] br0: port 1(s0) entered disabled state [ 400.400168][ T50] br0: port 4(s2) entered disabled state [ 400.941504][ T2791] bond0: (slave eth0): making interface the new active one [ 400.942603][ T2791] bond0: (slave eth0): Enslaving as an active interface with an up link [ 400.943633][ T2766] br0: port 1(s0) entered blocking state [ 400.944119][ T2766] br0: port 1(s0) entered forwarding state [ 401.128792][ T2792] bond0: (slave eth1): making interface the new active one [ 401.130771][ T2792] bond0: (slave eth1): Enslaving as an active interface with an up link [ 401.131643][ T69] br0: port 2(s1) entered blocking state [ 401.132067][ T69] br0: port 2(s1) entered forwarding state [ 401.346201][ T2793] bond0: (slave eth2): Enslaving as a backup interface with an up link [ 401.348414][ T50] br0: port 4(s2) entered blocking state [ 401.348857][ T50] br0: port 4(s2) entered forwarding state [ 401.519669][ T250] bond0: (slave eth0): link status definitely down, disabling slave [ 401.526522][ T250] bond0: (slave eth1): link status definitely down, disabling slave [ 401.526986][ T250] bond0: (slave eth2): making interface the new active one [ 401.629470][ T250] bond0: (slave eth0): link status definitely up [ 401.630089][ T250] bond0: (slave eth1): link status definitely up [...] # TEST: prio (active-backup ns_ip6_target primary_reselect 1) [FAIL] # Current active slave is eth2 but not eth1 Fix it by setting active slave to primary slave specifically before testing. [1] https://netdev-3.bots.linux.dev/vmksft-bonding-dbg/results/464301/1-bond-options-sh/stdout Fixes: 481b56e0391e ("selftests: bonding: re-format bond option tests") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/bonding/bond_options.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh index d508486cc0bdc..9a3d3c389dadd 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -62,6 +62,8 @@ prio_test() # create bond bond_reset "${param}" + # set active_slave to primary eth1 specifically + ip -n ${s_ns} link set bond0 type bond active_slave eth1 # check bonding member prio value ip -n ${s_ns} link set eth0 type bond_slave prio 0 -- GitLab From 9815e39617541ef52d0dfac4be274ad378c6dc09 Mon Sep 17 00:00:00 2001 From: "Andrey Jr. Melnikov" Date: Wed, 14 Feb 2024 17:57:57 +0100 Subject: [PATCH 1214/1405] ahci: asm1064: correct count of reported ports The ASM1064 SATA host controller always reports wrongly, that it has 24 ports. But in reality, it only has four ports. before: ahci 0000:04:00.0: SSS flag set, parallel bus scan disabled ahci 0000:04:00.0: AHCI 0001.0301 32 slots 24 ports 6 Gbps 0xffff0f impl SATA mode ahci 0000:04:00.0: flags: 64bit ncq sntf stag pm led only pio sxs deso sadm sds apst after: ahci 0000:04:00.0: ASM1064 has only four ports ahci 0000:04:00.0: forcing port_map 0xffff0f -> 0xf ahci 0000:04:00.0: SSS flag set, parallel bus scan disabled ahci 0000:04:00.0: AHCI 0001.0301 32 slots 24 ports 6 Gbps 0xf impl SATA mode ahci 0000:04:00.0: flags: 64bit ncq sntf stag pm led only pio sxs deso sadm sds apst Signed-off-by: "Andrey Jr. Melnikov" Signed-off-by: Niklas Cassel --- drivers/ata/ahci.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index da2e74fce2d99..682ff550ccfb9 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -671,9 +671,17 @@ MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets"); static void ahci_pci_save_initial_config(struct pci_dev *pdev, struct ahci_host_priv *hpriv) { - if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == 0x1166) { - dev_info(&pdev->dev, "ASM1166 has only six ports\n"); - hpriv->saved_port_map = 0x3f; + if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA) { + switch (pdev->device) { + case 0x1166: + dev_info(&pdev->dev, "ASM1166 has only six ports\n"); + hpriv->saved_port_map = 0x3f; + break; + case 0x1064: + dev_info(&pdev->dev, "ASM1064 has only four ports\n"); + hpriv->saved_port_map = 0xf; + break; + } } if (pdev->vendor == PCI_VENDOR_ID_JMICRON && pdev->device == 0x2361) { -- GitLab From 26c8404e162b43dddcb037ba2d0cb58c0ed60aab Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Fri, 16 Feb 2024 23:44:57 +0530 Subject: [PATCH 1215/1405] ata: ahci_ceva: fix error handling for Xilinx GT PHY support Platform clock and phy error resources are not cleaned up in Xilinx GT PHY error path. To fix introduce the function ceva_ahci_platform_enable_resources() which is a customized version of ahci_platform_enable_resources() and inline with SATA IP programming sequence it does: - Assert SATA reset - Program PS GTR phy - Bring SATA by de-asserting the reset - Wait for GT lane PLL to be locked ceva_ahci_platform_enable_resources() is also used in the resume path as the same SATA programming sequence (as in probe) should be followed. Also cleanup the mixed usage of ahci_platform_enable_resources() and custom implementation in the probe function as both are not required. Fixes: 9a9d3abe24bb ("ata: ahci: ceva: Update the driver to support xilinx GT phy") Signed-off-by: Radhey Shyam Pandey Reviewed-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/ahci_ceva.c | 125 +++++++++++++++++++++++++--------------- 1 file changed, 79 insertions(+), 46 deletions(-) diff --git a/drivers/ata/ahci_ceva.c b/drivers/ata/ahci_ceva.c index 64f7f7d6ba84e..11a2c199a7c24 100644 --- a/drivers/ata/ahci_ceva.c +++ b/drivers/ata/ahci_ceva.c @@ -88,7 +88,6 @@ struct ceva_ahci_priv { u32 axicc; bool is_cci_enabled; int flags; - struct reset_control *rst; }; static unsigned int ceva_ahci_read_id(struct ata_device *dev, @@ -189,6 +188,60 @@ static const struct scsi_host_template ahci_platform_sht = { AHCI_SHT(DRV_NAME), }; +static int ceva_ahci_platform_enable_resources(struct ahci_host_priv *hpriv) +{ + int rc, i; + + rc = ahci_platform_enable_regulators(hpriv); + if (rc) + return rc; + + rc = ahci_platform_enable_clks(hpriv); + if (rc) + goto disable_regulator; + + /* Assert the controller reset */ + rc = ahci_platform_assert_rsts(hpriv); + if (rc) + goto disable_clks; + + for (i = 0; i < hpriv->nports; i++) { + rc = phy_init(hpriv->phys[i]); + if (rc) + goto disable_rsts; + } + + /* De-assert the controller reset */ + ahci_platform_deassert_rsts(hpriv); + + for (i = 0; i < hpriv->nports; i++) { + rc = phy_power_on(hpriv->phys[i]); + if (rc) { + phy_exit(hpriv->phys[i]); + goto disable_phys; + } + } + + return 0; + +disable_rsts: + ahci_platform_deassert_rsts(hpriv); + +disable_phys: + while (--i >= 0) { + phy_power_off(hpriv->phys[i]); + phy_exit(hpriv->phys[i]); + } + +disable_clks: + ahci_platform_disable_clks(hpriv); + +disable_regulator: + ahci_platform_disable_regulators(hpriv); + + return rc; +} + static int ceva_ahci_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -203,47 +256,19 @@ static int ceva_ahci_probe(struct platform_device *pdev) return -ENOMEM; cevapriv->ahci_pdev = pdev; - - cevapriv->rst = devm_reset_control_get_optional_exclusive(&pdev->dev, - NULL); - if (IS_ERR(cevapriv->rst)) - dev_err_probe(&pdev->dev, PTR_ERR(cevapriv->rst), - "failed to get reset\n"); - hpriv = ahci_platform_get_resources(pdev, 0); if (IS_ERR(hpriv)) return PTR_ERR(hpriv); - if (!cevapriv->rst) { - rc = ahci_platform_enable_resources(hpriv); - if (rc) - return rc; - } else { - int i; + hpriv->rsts = devm_reset_control_get_optional_exclusive(&pdev->dev, + NULL); + if (IS_ERR(hpriv->rsts)) + return dev_err_probe(&pdev->dev, PTR_ERR(hpriv->rsts), + "failed to get reset\n"); - rc = ahci_platform_enable_clks(hpriv); - if (rc) - return rc; - /* Assert the controller reset */ - reset_control_assert(cevapriv->rst); - - for (i = 0; i < hpriv->nports; i++) { - rc = phy_init(hpriv->phys[i]); - if (rc) - return rc; - } - - /* De-assert the controller reset */ - reset_control_deassert(cevapriv->rst); - - for (i = 0; i < hpriv->nports; i++) { - rc = phy_power_on(hpriv->phys[i]); - if (rc) { - phy_exit(hpriv->phys[i]); - return rc; - } - } - } + rc = ceva_ahci_platform_enable_resources(hpriv); + if (rc) + return rc; if (of_property_read_bool(np, "ceva,broken-gen2")) cevapriv->flags = CEVA_FLAG_BROKEN_GEN2; @@ -252,52 +277,60 @@ static int ceva_ahci_probe(struct platform_device *pdev) if (of_property_read_u8_array(np, "ceva,p0-cominit-params", (u8 *)&cevapriv->pp2c[0], 4) < 0) { dev_warn(dev, "ceva,p0-cominit-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-cominit-params", (u8 *)&cevapriv->pp2c[1], 4) < 0) { dev_warn(dev, "ceva,p1-cominit-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read OOB timing value for COMWAKE from device-tree*/ if (of_property_read_u8_array(np, "ceva,p0-comwake-params", (u8 *)&cevapriv->pp3c[0], 4) < 0) { dev_warn(dev, "ceva,p0-comwake-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-comwake-params", (u8 *)&cevapriv->pp3c[1], 4) < 0) { dev_warn(dev, "ceva,p1-comwake-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read phy BURST timing value from device-tree */ if (of_property_read_u8_array(np, "ceva,p0-burst-params", (u8 *)&cevapriv->pp4c[0], 4) < 0) { dev_warn(dev, "ceva,p0-burst-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u8_array(np, "ceva,p1-burst-params", (u8 *)&cevapriv->pp4c[1], 4) < 0) { dev_warn(dev, "ceva,p1-burst-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* Read phy RETRY interval timing value from device-tree */ if (of_property_read_u16_array(np, "ceva,p0-retry-params", (u16 *)&cevapriv->pp5c[0], 2) < 0) { dev_warn(dev, "ceva,p0-retry-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } if (of_property_read_u16_array(np, "ceva,p1-retry-params", (u16 *)&cevapriv->pp5c[1], 2) < 0) { dev_warn(dev, "ceva,p1-retry-params property not defined\n"); - return -EINVAL; + rc = -EINVAL; + goto disable_resources; } /* @@ -335,7 +368,7 @@ static int __maybe_unused ceva_ahci_resume(struct device *dev) struct ahci_host_priv *hpriv = host->private_data; int rc; - rc = ahci_platform_enable_resources(hpriv); + rc = ceva_ahci_platform_enable_resources(hpriv); if (rc) return rc; -- GitLab From 335126937753844d36036984e96a8f343538a778 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 15 Feb 2024 17:44:32 +0000 Subject: [PATCH 1216/1405] drm/tests/drm_buddy: fix 32b build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Doesn't seem to compile on 32b, presumably due to u64 mod/division. Simplest is to just switch over to u32 here. Also make print modifiers consistent with that. Fixes: a64056bb5a32 ("drm/tests/drm_buddy: add alloc_contiguous test") Reported-by: Geert Uytterhoeven Signed-off-by: Matthew Auld Cc: Arunpravin Paneer Selvam Cc: Christian König Cc: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240215174431.285069-7-matthew.auld@intel.com Signed-off-by: Christian König --- drivers/gpu/drm/tests/drm_buddy_test.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c index fee6bec757d1a..edacc1adb28fa 100644 --- a/drivers/gpu/drm/tests/drm_buddy_test.c +++ b/drivers/gpu/drm/tests/drm_buddy_test.c @@ -21,7 +21,7 @@ static inline u64 get_size(int order, u64 chunk_size) static void drm_test_buddy_alloc_contiguous(struct kunit *test) { - u64 mm_size, ps = SZ_4K, i, n_pages, total; + u32 mm_size, ps = SZ_4K, i, n_pages, total; struct drm_buddy_block *block; struct drm_buddy mm; LIST_HEAD(left); @@ -56,30 +56,30 @@ static void drm_test_buddy_alloc_contiguous(struct kunit *test) KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, ps, ps, list, 0), - "buddy_alloc hit an error size=%d\n", + "buddy_alloc hit an error size=%u\n", ps); } while (++i < n_pages); KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 3 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc didn't error size=%d\n", 3 * ps); + "buddy_alloc didn't error size=%u\n", 3 * ps); drm_buddy_free_list(&mm, &middle); KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 3 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc didn't error size=%llu\n", 3 * ps); + "buddy_alloc didn't error size=%u\n", 3 * ps); KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 2 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc didn't error size=%llu\n", 2 * ps); + "buddy_alloc didn't error size=%u\n", 2 * ps); drm_buddy_free_list(&mm, &right); KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 3 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc didn't error size=%llu\n", 3 * ps); + "buddy_alloc didn't error size=%u\n", 3 * ps); /* * At this point we should have enough contiguous space for 2 blocks, * however they are never buddies (since we freed middle and right) so @@ -88,13 +88,13 @@ static void drm_test_buddy_alloc_contiguous(struct kunit *test) KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 2 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc hit an error size=%d\n", 2 * ps); + "buddy_alloc hit an error size=%u\n", 2 * ps); drm_buddy_free_list(&mm, &left); KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, 3 * ps, ps, &allocated, DRM_BUDDY_CONTIGUOUS_ALLOCATION), - "buddy_alloc hit an error size=%d\n", 3 * ps); + "buddy_alloc hit an error size=%u\n", 3 * ps); total = 0; list_for_each_entry(block, &allocated, link) -- GitLab From e42b9d8b9ea2672811285e6a7654887ff64d23f3 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 7 Feb 2024 10:00:42 +1030 Subject: [PATCH 1217/1405] btrfs: defrag: avoid unnecessary defrag caused by incorrect extent size [BUG] With the following file extent layout, defrag would do unnecessary IO and result more on-disk space usage. # mkfs.btrfs -f $dev # mount $dev $mnt # xfs_io -f -c "pwrite 0 40m" $mnt/foobar # sync # xfs_io -f -c "pwrite 40m 16k" $mnt/foobar # sync Above command would lead to the following file extent layout: item 6 key (257 EXTENT_DATA 0) itemoff 15816 itemsize 53 generation 7 type 1 (regular) extent data disk byte 298844160 nr 41943040 extent data offset 0 nr 41943040 ram 41943040 extent compression 0 (none) item 7 key (257 EXTENT_DATA 41943040) itemoff 15763 itemsize 53 generation 8 type 1 (regular) extent data disk byte 13631488 nr 16384 extent data offset 0 nr 16384 ram 16384 extent compression 0 (none) Which is mostly fine. We can allow the final 16K to be merged with the previous 40M, but it's upon the end users' preference. But if we defrag the file using the default parameters, it would result worse file layout: # btrfs filesystem defrag $mnt/foobar # sync item 6 key (257 EXTENT_DATA 0) itemoff 15816 itemsize 53 generation 7 type 1 (regular) extent data disk byte 298844160 nr 41943040 extent data offset 0 nr 8650752 ram 41943040 extent compression 0 (none) item 7 key (257 EXTENT_DATA 8650752) itemoff 15763 itemsize 53 generation 9 type 1 (regular) extent data disk byte 340787200 nr 33292288 extent data offset 0 nr 33292288 ram 33292288 extent compression 0 (none) item 8 key (257 EXTENT_DATA 41943040) itemoff 15710 itemsize 53 generation 8 type 1 (regular) extent data disk byte 13631488 nr 16384 extent data offset 0 nr 16384 ram 16384 extent compression 0 (none) Note the original 40M extent is still there, but a new 32M extent is created for no benefit at all. [CAUSE] There is an existing check to make sure we won't defrag a large enough extent (the threshold is by default 32M). But the check is using the length to the end of the extent: range_len = em->len - (cur - em->start); /* Skip too large extent */ if (range_len >= extent_thresh) goto next; This means, for the first 8MiB of the extent, the range_len is always smaller than the default threshold, and would not be defragged. But after the first 8MiB, the remaining part would fit the requirement, and be defragged. Such different behavior inside the same extent caused the above problem, and we should avoid different defrag decision inside the same extent. [FIX] Instead of using @range_len, just use @em->len, so that we have a consistent decision among the same file extent. Now with this fix, we won't touch the extent, thus not making it any worse. Reported-by: Filipe Manana Fixes: 0cb5950f3f3b ("btrfs: fix deadlock when reserving space during defrag") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Boris Burkov Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/defrag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c index c276b136ab63a..5b0b645714183 100644 --- a/fs/btrfs/defrag.c +++ b/fs/btrfs/defrag.c @@ -1046,7 +1046,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode, goto add; /* Skip too large extent */ - if (range_len >= extent_thresh) + if (em->len >= extent_thresh) goto next; /* -- GitLab From b0ad381fa7690244802aed119b478b4bdafc31dd Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 12 Feb 2024 11:56:02 -0500 Subject: [PATCH 1218/1405] btrfs: fix deadlock with fiemap and extent locking While working on the patchset to remove extent locking I got a lockdep splat with fiemap and pagefaulting with my new extent lock replacement lock. This deadlock exists with our normal code, we just don't have lockdep annotations with the extent locking so we've never noticed it. Since we're copying the fiemap extent to user space on every iteration we have the chance of pagefaulting. Because we hold the extent lock for the entire range we could mkwrite into a range in the file that we have mmap'ed. This would deadlock with the following stack trace [<0>] lock_extent+0x28d/0x2f0 [<0>] btrfs_page_mkwrite+0x273/0x8a0 [<0>] do_page_mkwrite+0x50/0xb0 [<0>] do_fault+0xc1/0x7b0 [<0>] __handle_mm_fault+0x2fa/0x460 [<0>] handle_mm_fault+0xa4/0x330 [<0>] do_user_addr_fault+0x1f4/0x800 [<0>] exc_page_fault+0x7c/0x1e0 [<0>] asm_exc_page_fault+0x26/0x30 [<0>] rep_movs_alternative+0x33/0x70 [<0>] _copy_to_user+0x49/0x70 [<0>] fiemap_fill_next_extent+0xc8/0x120 [<0>] emit_fiemap_extent+0x4d/0xa0 [<0>] extent_fiemap+0x7f8/0xad0 [<0>] btrfs_fiemap+0x49/0x80 [<0>] __x64_sys_ioctl+0x3e1/0xb50 [<0>] do_syscall_64+0x94/0x1a0 [<0>] entry_SYSCALL_64_after_hwframe+0x6e/0x76 I wrote an fstest to reproduce this deadlock without my replacement lock and verified that the deadlock exists with our existing locking. To fix this simply don't take the extent lock for the entire duration of the fiemap. This is safe in general because we keep track of where we are when we're searching the tree, so if an ordered extent updates in the middle of our fiemap call we'll still emit the correct extents because we know what offset we were on before. The only place we maintain the lock is searching delalloc. Since the delalloc stuff can change during writeback we want to lock the extent range so we have a consistent view of delalloc at the time we're checking to see if we need to set the delalloc flag. With this patch applied we no longer deadlock with my testcase. CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 62 ++++++++++++++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a0ffd41c5cc19..61d961a30dee2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2689,16 +2689,34 @@ static int fiemap_process_hole(struct btrfs_inode *inode, * it beyond i_size. */ while (cur_offset < end && cur_offset < i_size) { + struct extent_state *cached_state = NULL; u64 delalloc_start; u64 delalloc_end; u64 prealloc_start; + u64 lockstart; + u64 lockend; u64 prealloc_len = 0; bool delalloc; + lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize); + lockend = round_up(end, inode->root->fs_info->sectorsize); + + /* + * We are only locking for the delalloc range because that's the + * only thing that can change here. With fiemap we have a lock + * on the inode, so no buffered or direct writes can happen. + * + * However mmaps and normal page writeback will cause this to + * change arbitrarily. We have to lock the extent lock here to + * make sure that nobody messes with the tree while we're doing + * btrfs_find_delalloc_in_range. + */ + lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end, delalloc_cached_state, &delalloc_start, &delalloc_end); + unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); if (!delalloc) break; @@ -2866,15 +2884,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { const u64 ino = btrfs_ino(inode); - struct extent_state *cached_state = NULL; struct extent_state *delalloc_cached_state = NULL; struct btrfs_path *path; struct fiemap_cache cache = { 0 }; struct btrfs_backref_share_check_ctx *backref_ctx; u64 last_extent_end; u64 prev_extent_end; - u64 lockstart; - u64 lockend; + u64 range_start; + u64 range_end; + const u64 sectorsize = inode->root->fs_info->sectorsize; bool stopped = false; int ret; @@ -2885,12 +2903,11 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, goto out; } - lockstart = round_down(start, inode->root->fs_info->sectorsize); - lockend = round_up(start + len, inode->root->fs_info->sectorsize); - prev_extent_end = lockstart; + range_start = round_down(start, sectorsize); + range_end = round_up(start + len, sectorsize); + prev_extent_end = range_start; btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); - lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end); if (ret < 0) @@ -2898,7 +2915,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, btrfs_release_path(path); path->reada = READA_FORWARD; - ret = fiemap_search_slot(inode, path, lockstart); + ret = fiemap_search_slot(inode, path, range_start); if (ret < 0) { goto out_unlock; } else if (ret > 0) { @@ -2910,7 +2927,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, goto check_eof_delalloc; } - while (prev_extent_end < lockend) { + while (prev_extent_end < range_end) { struct extent_buffer *leaf = path->nodes[0]; struct btrfs_file_extent_item *ei; struct btrfs_key key; @@ -2933,19 +2950,19 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, * The first iteration can leave us at an extent item that ends * before our range's start. Move to the next item. */ - if (extent_end <= lockstart) + if (extent_end <= range_start) goto next_item; backref_ctx->curr_leaf_bytenr = leaf->start; /* We have in implicit hole (NO_HOLES feature enabled). */ if (prev_extent_end < key.offset) { - const u64 range_end = min(key.offset, lockend) - 1; + const u64 hole_end = min(key.offset, range_end) - 1; ret = fiemap_process_hole(inode, fieinfo, &cache, &delalloc_cached_state, backref_ctx, 0, 0, 0, - prev_extent_end, range_end); + prev_extent_end, hole_end); if (ret < 0) { goto out_unlock; } else if (ret > 0) { @@ -2955,7 +2972,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, } /* We've reached the end of the fiemap range, stop. */ - if (key.offset >= lockend) { + if (key.offset >= range_end) { stopped = true; break; } @@ -3049,29 +3066,41 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, btrfs_free_path(path); path = NULL; - if (!stopped && prev_extent_end < lockend) { + if (!stopped && prev_extent_end < range_end) { ret = fiemap_process_hole(inode, fieinfo, &cache, &delalloc_cached_state, backref_ctx, - 0, 0, 0, prev_extent_end, lockend - 1); + 0, 0, 0, prev_extent_end, range_end - 1); if (ret < 0) goto out_unlock; - prev_extent_end = lockend; + prev_extent_end = range_end; } if (cache.cached && cache.offset + cache.len >= last_extent_end) { const u64 i_size = i_size_read(&inode->vfs_inode); if (prev_extent_end < i_size) { + struct extent_state *cached_state = NULL; u64 delalloc_start; u64 delalloc_end; + u64 lockstart; + u64 lockend; bool delalloc; + lockstart = round_down(prev_extent_end, sectorsize); + lockend = round_up(i_size, sectorsize); + + /* + * See the comment in fiemap_process_hole as to why + * we're doing the locking here. + */ + lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); delalloc = btrfs_find_delalloc_in_range(inode, prev_extent_end, i_size - 1, &delalloc_cached_state, &delalloc_start, &delalloc_end); + unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); if (!delalloc) cache.flags |= FIEMAP_EXTENT_LAST; } else { @@ -3082,7 +3111,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, ret = emit_last_fiemap_cache(fieinfo, &cache); out_unlock: - unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); out: free_extent_state(delalloc_cached_state); -- GitLab From 121e4dcba3700b30e63f25203d09ddfccbab4a09 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 16 Feb 2024 14:52:59 -0800 Subject: [PATCH 1219/1405] ionic: use pci_is_enabled not open code Since there is a utility available for this, use the API rather than open code. Fixes: 13943d6c8273 ("ionic: prevent pci disable of already disabled device") Reviewed-by: Brett Creeley Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index c49aa358e4244..10a9d80db32cb 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -223,7 +223,7 @@ static void ionic_clear_pci(struct ionic *ionic) ionic_unmap_bars(ionic); pci_release_regions(ionic->pdev); - if (atomic_read(&ionic->pdev->enable_cnt) > 0) + if (pci_is_enabled(ionic->pdev)) pci_disable_device(ionic->pdev); } -- GitLab From 40b9385dd8e6a0515e1c9cd06a277483556b7286 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 16 Feb 2024 15:30:05 -0800 Subject: [PATCH 1220/1405] enic: Avoid false positive under FORTIFY_SOURCE FORTIFY_SOURCE has been ignoring 0-sized destinations while the kernel code base has been converted to flexible arrays. In order to enforce the 0-sized destinations (e.g. with __counted_by), the remaining 0-sized destinations need to be handled. Unfortunately, struct vic_provinfo resists full conversion, as it contains a flexible array of flexible arrays, which is only possible with the 0-sized fake flexible array. Use unsafe_memcpy() to avoid future false positives under CONFIG_FORTIFY_SOURCE. Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/vnic_vic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cisco/enic/vnic_vic.c b/drivers/net/ethernet/cisco/enic/vnic_vic.c index 20fcb20b42ede..66b5778353389 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_vic.c +++ b/drivers/net/ethernet/cisco/enic/vnic_vic.c @@ -49,7 +49,8 @@ int vic_provinfo_add_tlv(struct vic_provinfo *vp, u16 type, u16 length, tlv->type = htons(type); tlv->length = htons(length); - memcpy(tlv->value, value, length); + unsafe_memcpy(tlv->value, value, length, + /* Flexible array of flexible arrays */); vp->num_tlvs = htonl(ntohl(vp->num_tlvs) + 1); vp->length = htonl(ntohl(vp->length) + -- GitLab From 0281b919e175bb9c3128bd3872ac2903e9436e3f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 15 Feb 2024 13:12:17 -0800 Subject: [PATCH 1221/1405] bpf: Fix racing between bpf_timer_cancel_and_free and bpf_timer_cancel The following race is possible between bpf_timer_cancel_and_free and bpf_timer_cancel. It will lead a UAF on the timer->timer. bpf_timer_cancel(); spin_lock(); t = timer->time; spin_unlock(); bpf_timer_cancel_and_free(); spin_lock(); t = timer->timer; timer->timer = NULL; spin_unlock(); hrtimer_cancel(&t->timer); kfree(t); /* UAF on t */ hrtimer_cancel(&t->timer); In bpf_timer_cancel_and_free, this patch frees the timer->timer after a rcu grace period. This requires a rcu_head addition to the "struct bpf_hrtimer". Another kfree(t) happens in bpf_timer_init, this does not need a kfree_rcu because it is still under the spin_lock and timer->timer has not been visible by others yet. In bpf_timer_cancel, rcu_read_lock() is added because this helper can be used in a non rcu critical section context (e.g. from a sleepable bpf prog). Other timer->timer usages in helpers.c have been audited, bpf_timer_cancel() is the only place where timer->timer is used outside of the spin_lock. Another solution considered is to mark a t->flag in bpf_timer_cancel and clear it after hrtimer_cancel() is done. In bpf_timer_cancel_and_free, it busy waits for the flag to be cleared before kfree(t). This patch goes with a straight forward solution and frees timer->timer after a rcu grace period. Fixes: b00628b1c7d5 ("bpf: Introduce bpf timers.") Suggested-by: Alexei Starovoitov Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Hou Tao Link: https://lore.kernel.org/bpf/20240215211218.990808-1-martin.lau@linux.dev --- kernel/bpf/helpers.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index be72824f32b2c..d19cd863d294e 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1101,6 +1101,7 @@ struct bpf_hrtimer { struct bpf_prog *prog; void __rcu *callback_fn; void *value; + struct rcu_head rcu; }; /* the actual struct hidden inside uapi struct bpf_timer */ @@ -1332,6 +1333,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer) if (in_nmi()) return -EOPNOTSUPP; + rcu_read_lock(); __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; if (!t) { @@ -1353,6 +1355,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer) * if it was running. */ ret = ret ?: hrtimer_cancel(&t->timer); + rcu_read_unlock(); return ret; } @@ -1407,7 +1410,7 @@ void bpf_timer_cancel_and_free(void *val) */ if (this_cpu_read(hrtimer_running) != t) hrtimer_cancel(&t->timer); - kfree(t); + kfree_rcu(t, rcu); } BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) -- GitLab From 3f00e4a9c96f4488a924aff4e35b77c8eced897e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 15 Feb 2024 13:12:18 -0800 Subject: [PATCH 1222/1405] selftests/bpf: Test racing between bpf_timer_cancel_and_free and bpf_timer_cancel This selftest is based on a Alexei's test adopted from an internal user to troubleshoot another bug. During this exercise, a separate racing bug was discovered between bpf_timer_cancel_and_free and bpf_timer_cancel. The details can be found in the previous patch. This patch is to add a selftest that can trigger the bug. I can trigger the UAF everytime in my qemu setup with KASAN. The idea is to have multiple user space threads running in a tight loop to exercise both bpf_map_update_elem (which calls into bpf_timer_cancel_and_free) and bpf_timer_cancel. Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Hou Tao Link: https://lore.kernel.org/bpf/20240215211218.990808-2-martin.lau@linux.dev --- .../testing/selftests/bpf/prog_tests/timer.c | 35 ++++++++++++++++++- tools/testing/selftests/bpf/progs/timer.c | 34 +++++++++++++++++- 2 files changed, 67 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index 760ad96b4be09..d66687f1ee6a8 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -4,10 +4,29 @@ #include "timer.skel.h" #include "timer_failure.skel.h" +#define NUM_THR 8 + +static void *spin_lock_thread(void *arg) +{ + int i, err, prog_fd = *(int *)arg; + LIBBPF_OPTS(bpf_test_run_opts, topts); + + for (i = 0; i < 10000; i++) { + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err") || + !ASSERT_OK(topts.retval, "test_run_opts retval")) + break; + } + + pthread_exit(arg); +} + static int timer(struct timer *timer_skel) { - int err, prog_fd; + int i, err, prog_fd; LIBBPF_OPTS(bpf_test_run_opts, topts); + pthread_t thread_id[NUM_THR]; + void *ret; err = timer__attach(timer_skel); if (!ASSERT_OK(err, "timer_attach")) @@ -43,6 +62,20 @@ static int timer(struct timer *timer_skel) /* check that code paths completed */ ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok"); + prog_fd = bpf_program__fd(timer_skel->progs.race); + for (i = 0; i < NUM_THR; i++) { + err = pthread_create(&thread_id[i], NULL, + &spin_lock_thread, &prog_fd); + if (!ASSERT_OK(err, "pthread_create")) + break; + } + + while (i) { + err = pthread_join(thread_id[--i], &ret); + if (ASSERT_OK(err, "pthread_join")) + ASSERT_EQ(ret, (void *)&prog_fd, "pthread_join"); + } + return 0; } diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c index 8b946c8188c65..f615da97df263 100644 --- a/tools/testing/selftests/bpf/progs/timer.c +++ b/tools/testing/selftests/bpf/progs/timer.c @@ -51,7 +51,8 @@ struct { __uint(max_entries, 1); __type(key, int); __type(value, struct elem); -} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"); +} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"), + race_array SEC(".maps"); __u64 bss_data; __u64 abs_data; @@ -390,3 +391,34 @@ int BPF_PROG2(test5, int, a) return 0; } + +static int race_timer_callback(void *race_array, int *race_key, struct bpf_timer *timer) +{ + bpf_timer_start(timer, 1000000, 0); + return 0; +} + +SEC("syscall") +int race(void *ctx) +{ + struct bpf_timer *timer; + int err, race_key = 0; + struct elem init; + + __builtin_memset(&init, 0, sizeof(struct elem)); + bpf_map_update_elem(&race_array, &race_key, &init, BPF_ANY); + + timer = bpf_map_lookup_elem(&race_array, &race_key); + if (!timer) + return 1; + + err = bpf_timer_init(timer, &race_array, CLOCK_MONOTONIC); + if (err && err != -EBUSY) + return 1; + + bpf_timer_set_callback(timer, race_timer_callback); + bpf_timer_start(timer, 0, 0); + bpf_timer_cancel(timer); + + return 0; +} -- GitLab From 5f2ae606cb5a90839a9be9d22388c4200f820e75 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sat, 17 Feb 2024 19:41:51 +0800 Subject: [PATCH 1223/1405] bpf: Fix an issue due to uninitialized bpf_iter_task Failure to initialize it->pos, coupled with the presence of an invalid value in the flags variable, can lead to it->pos referencing an invalid task, potentially resulting in a kernel panic. To mitigate this risk, it's crucial to ensure proper initialization of it->pos to NULL. Fixes: ac8148d957f5 ("bpf: bpf_iter_task_next: use next_task(kit->task) rather than next_task(kit->pos)") Signed-off-by: Yafang Shao Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Oleg Nesterov Link: https://lore.kernel.org/bpf/20240217114152.1623-2-laoar.shao@gmail.com --- kernel/bpf/task_iter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index e5c3500443c6e..ec4e97c61eefe 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -978,6 +978,8 @@ __bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it, BUILD_BUG_ON(__alignof__(struct bpf_iter_task_kern) != __alignof__(struct bpf_iter_task)); + kit->pos = NULL; + switch (flags) { case BPF_TASK_ITER_ALL_THREADS: case BPF_TASK_ITER_ALL_PROCS: -- GitLab From 5c138a8a4abe152fcbef1ed40a6a4b5727b2991b Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sat, 17 Feb 2024 19:41:52 +0800 Subject: [PATCH 1224/1405] selftests/bpf: Add negtive test cases for task iter Incorporate a test case to assess the handling of invalid flags or task__nullable parameters passed to bpf_iter_task_new(). Prior to the preceding commit, this scenario could potentially trigger a kernel panic. However, with the previous commit, this test case is expected to function correctly. Signed-off-by: Yafang Shao Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240217114152.1623-3-laoar.shao@gmail.com --- tools/testing/selftests/bpf/prog_tests/iters.c | 1 + tools/testing/selftests/bpf/progs/iters_task.c | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c index bf84d4a1d9ae2..3c440370c1f0f 100644 --- a/tools/testing/selftests/bpf/prog_tests/iters.c +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -193,6 +193,7 @@ static void subtest_task_iters(void) ASSERT_EQ(skel->bss->procs_cnt, 1, "procs_cnt"); ASSERT_EQ(skel->bss->threads_cnt, thread_num + 1, "threads_cnt"); ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 1, "proc_threads_cnt"); + ASSERT_EQ(skel->bss->invalid_cnt, 0, "invalid_cnt"); pthread_mutex_unlock(&do_nothing_mutex); for (int i = 0; i < thread_num; i++) ASSERT_OK(pthread_join(thread_ids[i], &ret), "pthread_join"); diff --git a/tools/testing/selftests/bpf/progs/iters_task.c b/tools/testing/selftests/bpf/progs/iters_task.c index c9b4055cd410a..e4d53e40ff208 100644 --- a/tools/testing/selftests/bpf/progs/iters_task.c +++ b/tools/testing/selftests/bpf/progs/iters_task.c @@ -10,7 +10,7 @@ char _license[] SEC("license") = "GPL"; pid_t target_pid; -int procs_cnt, threads_cnt, proc_threads_cnt; +int procs_cnt, threads_cnt, proc_threads_cnt, invalid_cnt; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; @@ -26,6 +26,16 @@ int iter_task_for_each_sleep(void *ctx) procs_cnt = threads_cnt = proc_threads_cnt = 0; bpf_rcu_read_lock(); + bpf_for_each(task, pos, NULL, ~0U) { + /* Below instructions shouldn't be executed for invalid flags */ + invalid_cnt++; + } + + bpf_for_each(task, pos, NULL, BPF_TASK_ITER_PROC_THREADS) { + /* Below instructions shouldn't be executed for invalid task__nullable */ + invalid_cnt++; + } + bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS) if (pos->pid == target_pid) procs_cnt++; -- GitLab From 6f7d0f5fd8e440c3446560100ac4ff9a55eec340 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Fri, 9 Feb 2024 10:23:47 -0500 Subject: [PATCH 1225/1405] platform/x86: think-lmi: Fix password opcode ordering for workstations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Lenovo workstations require the password opcode to be run before the attribute value is changed (if Admin password is enabled). Tested on some Thinkpads to confirm they are OK with this order too. Signed-off-by: Mark Pearson Fixes: 640a5fa50a42 ("platform/x86: think-lmi: Opcode support") Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20240209152359.528919-1-mpearson-lenovo@squebb.ca Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/think-lmi.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 3a396b763c496..ce3e08815a8e6 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -1009,7 +1009,16 @@ static ssize_t current_value_store(struct kobject *kobj, * Note - this sets the variable and then the password as separate * WMI calls. Function tlmi_save_bios_settings will error if the * password is incorrect. + * Workstation's require the opcode to be set before changing the + * attribute. */ + if (tlmi_priv.pwd_admin->valid && tlmi_priv.pwd_admin->password[0]) { + ret = tlmi_opcode_setting("WmiOpcodePasswordAdmin", + tlmi_priv.pwd_admin->password); + if (ret) + goto out; + } + set_str = kasprintf(GFP_KERNEL, "%s,%s;", setting->display_name, new_setting); if (!set_str) { @@ -1021,17 +1030,10 @@ static ssize_t current_value_store(struct kobject *kobj, if (ret) goto out; - if (tlmi_priv.save_mode == TLMI_SAVE_BULK) { + if (tlmi_priv.save_mode == TLMI_SAVE_BULK) tlmi_priv.save_required = true; - } else { - if (tlmi_priv.pwd_admin->valid && tlmi_priv.pwd_admin->password[0]) { - ret = tlmi_opcode_setting("WmiOpcodePasswordAdmin", - tlmi_priv.pwd_admin->password); - if (ret) - goto out; - } + else ret = tlmi_save_bios_settings(""); - } } else { /* old non-opcode based authentication method (deprecated) */ if (tlmi_priv.pwd_admin->valid && tlmi_priv.pwd_admin->password[0]) { auth_str = kasprintf(GFP_KERNEL, "%s,%s,%s;", -- GitLab From 8f812373d195810c68e1beeabe2317f8c6a31012 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 10 Feb 2024 12:01:49 +0100 Subject: [PATCH 1226/1405] platform/x86: intel: int0002_vgpio: Pass IRQF_ONESHOT to request_irq() Since commit 7a36b901a6eb ("ACPI: OSL: Use a threaded interrupt handler for SCI") the ACPI OSL code passes IRQF_ONESHOT when requesting the SCI. Since the INT0002 GPIO is typically shared with the ACPI SCI the INT0002 driver must pass the same flags. This fixes the INT0002 driver failing to probe due to following error + as well as removing the backtrace that follows this error: "genirq: Flags mismatch irq 9. 00000084 (INT0002) vs. 00002080 (acpi)" Fixes: 7a36b901a6eb ("ACPI: OSL: Use a threaded interrupt handler for SCI") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240210110149.12803-1-hdegoede@redhat.com --- drivers/platform/x86/intel/int0002_vgpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/int0002_vgpio.c b/drivers/platform/x86/intel/int0002_vgpio.c index b6708bab7c53d..527d8fbc7cc11 100644 --- a/drivers/platform/x86/intel/int0002_vgpio.c +++ b/drivers/platform/x86/intel/int0002_vgpio.c @@ -196,7 +196,7 @@ static int int0002_probe(struct platform_device *pdev) * IRQs into gpiolib. */ ret = devm_request_irq(dev, irq, int0002_irq, - IRQF_SHARED, "INT0002", chip); + IRQF_ONESHOT | IRQF_SHARED, "INT0002", chip); if (ret) { dev_err(dev, "Error requesting IRQ %d: %d\n", irq, ret); return ret; -- GitLab From dbcbfd662a725641d118fb3ae5ffb7be4e3d0fb0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 12 Feb 2024 13:06:07 +0100 Subject: [PATCH 1227/1405] platform/x86: touchscreen_dmi: Allow partial (prefix) matches for ACPI names On some devices the ACPI name of the touchscreen is e.g. either MSSL1680:00 or MSSL1680:01 depending on the BIOS version. This happens for example on the "Chuwi Hi8 Air" tablet where the initial commit's ts_data uses "MSSL1680:00" but the tablets from the github issue and linux-hardware.org probe linked below both use "MSSL1680:01". Replace the strcmp() match on ts_data->acpi_name with a strstarts() check to allow using a partial match on just the ACPI HID of "MSSL1680" and change the ts_data->acpi_name for the "Chuwi Hi8 Air" accordingly to fix the touchscreen not working on models where it is "MSSL1680:01". Note this drops the length check for I2C_NAME_SIZE. This never was necessary since the ACPI names used are never more then 11 chars and I2C_NAME_SIZE is 20 so the replaced strncmp() would always stop long before reaching I2C_NAME_SIZE. Link: https://linux-hardware.org/?computer=AC4301C0542A Fixes: bbb97d728f77 ("platform/x86: touchscreen_dmi: Add info for the Chuwi Hi8 Air tablet") Closes: https://github.com/onitake/gsl-firmware/issues/91 Cc: stable@vger.kernel.org Reviewed-by: Kuppuswamy Sathyanarayanan Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240212120608.30469-1-hdegoede@redhat.com --- drivers/platform/x86/touchscreen_dmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 7aee5e9ff2b8d..969477c83e56e 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -81,7 +81,7 @@ static const struct property_entry chuwi_hi8_air_props[] = { }; static const struct ts_dmi_data chuwi_hi8_air_data = { - .acpi_name = "MSSL1680:00", + .acpi_name = "MSSL1680", .properties = chuwi_hi8_air_props, }; @@ -1821,7 +1821,7 @@ static void ts_dmi_add_props(struct i2c_client *client) int error; if (has_acpi_companion(dev) && - !strncmp(ts_data->acpi_name, client->name, I2C_NAME_SIZE)) { + strstarts(client->name, ts_data->acpi_name)) { error = device_create_managed_software_node(dev, ts_data->properties, NULL); if (error) dev_err(dev, "failed to add properties: %d\n", error); -- GitLab From f0ddb8a9021305e4e4e4179a2e47ecea6170d55d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 12 Feb 2024 13:06:08 +0100 Subject: [PATCH 1228/1405] platform/x86: touchscreen_dmi: Consolidate Goodix upside-down touchscreen data Now that prefix matches for ACPI names are supported, the ts_dmi_data structs for "GDIX1001:00" and "GDIX1001:01" can be consolidated into a single match matching on "GDIX1001". For consistency also change gdix1002_00_upside_down_data to match. Reviewed-by: Kuppuswamy Sathyanarayanan Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240212120608.30469-2-hdegoede@redhat.com --- drivers/platform/x86/touchscreen_dmi.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 969477c83e56e..975cf24ae359a 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -415,18 +415,13 @@ static const struct property_entry gdix1001_upside_down_props[] = { { } }; -static const struct ts_dmi_data gdix1001_00_upside_down_data = { - .acpi_name = "GDIX1001:00", - .properties = gdix1001_upside_down_props, -}; - -static const struct ts_dmi_data gdix1001_01_upside_down_data = { - .acpi_name = "GDIX1001:01", +static const struct ts_dmi_data gdix1001_upside_down_data = { + .acpi_name = "GDIX1001", .properties = gdix1001_upside_down_props, }; -static const struct ts_dmi_data gdix1002_00_upside_down_data = { - .acpi_name = "GDIX1002:00", +static const struct ts_dmi_data gdix1002_upside_down_data = { + .acpi_name = "GDIX1002", .properties = gdix1001_upside_down_props, }; @@ -1412,7 +1407,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = { }, { /* Juno Tablet */ - .driver_data = (void *)&gdix1002_00_upside_down_data, + .driver_data = (void *)&gdix1002_upside_down_data, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Default string"), /* Both product- and board-name being "Default string" is somewhat rare */ @@ -1658,7 +1653,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = { }, { /* Teclast X89 (Android version / BIOS) */ - .driver_data = (void *)&gdix1001_00_upside_down_data, + .driver_data = (void *)&gdix1001_upside_down_data, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "WISKY"), DMI_MATCH(DMI_BOARD_NAME, "3G062i"), @@ -1666,7 +1661,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = { }, { /* Teclast X89 (Windows version / BIOS) */ - .driver_data = (void *)&gdix1001_01_upside_down_data, + .driver_data = (void *)&gdix1001_upside_down_data, .matches = { /* tPAD is too generic, also match on bios date */ DMI_MATCH(DMI_BOARD_VENDOR, "TECLAST"), @@ -1684,7 +1679,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = { }, { /* Teclast X98 Pro */ - .driver_data = (void *)&gdix1001_00_upside_down_data, + .driver_data = (void *)&gdix1001_upside_down_data, .matches = { /* * Only match BIOS date, because the manufacturers @@ -1788,7 +1783,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = { }, { /* "WinBook TW100" */ - .driver_data = (void *)&gdix1001_00_upside_down_data, + .driver_data = (void *)&gdix1001_upside_down_data, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "WinBook"), DMI_MATCH(DMI_PRODUCT_NAME, "TW100") @@ -1796,7 +1791,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = { }, { /* WinBook TW700 */ - .driver_data = (void *)&gdix1001_00_upside_down_data, + .driver_data = (void *)&gdix1001_upside_down_data, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "WinBook"), DMI_MATCH(DMI_PRODUCT_NAME, "TW700") -- GitLab From 3da01394c0f727baaee728de290eb1ecaad099fb Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Fri, 16 Feb 2024 12:11:11 +0530 Subject: [PATCH 1229/1405] platform/x86/amd/pmf: Remove smart_pc_status enum Improve code readability by removing smart_pc_status enum, as the same can be done with a simple true/false check; Update the code checks accordingly. Also add a missing return on amd_pmf_init_smart_pc() success, to skip trying to setup the auto / slider modes which should not be used in this case. Signed-off-by: Shyam Sundar S K Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20240216064112.962582-1-Shyam-sundar.S-k@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmf/core.c | 11 ++++++++--- drivers/platform/x86/amd/pmf/pmf.h | 5 ----- drivers/platform/x86/amd/pmf/tee-if.c | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index feaa09f5b35a1..1d6dbd246d654 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -330,9 +330,14 @@ static void amd_pmf_init_features(struct amd_pmf_dev *dev) dev_dbg(dev->dev, "SPS enabled and Platform Profiles registered\n"); } - if (!amd_pmf_init_smart_pc(dev)) { + amd_pmf_init_smart_pc(dev); + if (dev->smart_pc_enabled) { dev_dbg(dev->dev, "Smart PC Solution Enabled\n"); - } else if (is_apmf_func_supported(dev, APMF_FUNC_AUTO_MODE)) { + /* If Smart PC is enabled, no need to check for other features */ + return; + } + + if (is_apmf_func_supported(dev, APMF_FUNC_AUTO_MODE)) { amd_pmf_init_auto_mode(dev); dev_dbg(dev->dev, "Auto Mode Init done\n"); } else if (is_apmf_func_supported(dev, APMF_FUNC_DYN_SLIDER_AC) || @@ -351,7 +356,7 @@ static void amd_pmf_deinit_features(struct amd_pmf_dev *dev) amd_pmf_deinit_sps(dev); } - if (!dev->smart_pc_enabled) { + if (dev->smart_pc_enabled) { amd_pmf_deinit_smart_pc(dev); } else if (is_apmf_func_supported(dev, APMF_FUNC_AUTO_MODE)) { amd_pmf_deinit_auto_mode(dev); diff --git a/drivers/platform/x86/amd/pmf/pmf.h b/drivers/platform/x86/amd/pmf/pmf.h index 16999c5b334fd..66cae1cca73cc 100644 --- a/drivers/platform/x86/amd/pmf/pmf.h +++ b/drivers/platform/x86/amd/pmf/pmf.h @@ -441,11 +441,6 @@ struct apmf_dyn_slider_output { struct apmf_cnqf_power_set ps[APMF_CNQF_MAX]; } __packed; -enum smart_pc_status { - PMF_SMART_PC_ENABLED, - PMF_SMART_PC_DISABLED, -}; - /* Smart PC - TA internals */ enum system_state { SYSTEM_STATE_S0i3, diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index f8c0177afb0da..8b7e3f87702e1 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -260,7 +260,7 @@ static int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev) res = amd_pmf_invoke_cmd_init(dev); if (res == TA_PMF_TYPE_SUCCESS) { /* Now its safe to announce that smart pc is enabled */ - dev->smart_pc_enabled = PMF_SMART_PC_ENABLED; + dev->smart_pc_enabled = true; /* * Start collecting the data from TA FW after a small delay * or else, we might end up getting stale values. @@ -268,7 +268,7 @@ static int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev) schedule_delayed_work(&dev->pb_work, msecs_to_jiffies(pb_actions_ms * 3)); } else { dev_err(dev->dev, "ta invoke cmd init failed err: %x\n", res); - dev->smart_pc_enabled = PMF_SMART_PC_DISABLED; + dev->smart_pc_enabled = false; return res; } -- GitLab From 11e298f3548a6fe5e6ad78f811abfba15e6ebbc1 Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Fri, 16 Feb 2024 12:11:12 +0530 Subject: [PATCH 1230/1405] platform/x86/amd/pmf: Fix TEE enact command failure after suspend and resume TEE enact command failures are seen after each suspend/resume cycle; fix this by cancelling the policy builder workqueue before going into suspend and reschedule the workqueue after resume. [ 629.516792] ccp 0000:c2:00.2: tee: command 0x5 timed out, disabling PSP [ 629.516835] amd-pmf AMDI0102:00: TEE enact cmd failed. err: ffff000e, ret:0 [ 630.550464] amd-pmf AMDI0102:00: AMD_PMF_REGISTER_RESPONSE:1 [ 630.550511] amd-pmf AMDI0102:00: AMD_PMF_REGISTER_ARGUMENT:7 [ 630.550548] amd-pmf AMDI0102:00: AMD_PMF_REGISTER_MESSAGE:16 Fixes: ae82cef7d9c5 ("platform/x86/amd/pmf: Add support for PMF-TA interaction") Co-developed-by: Patil Rajesh Reddy Signed-off-by: Patil Rajesh Reddy Signed-off-by: Shyam Sundar S K Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20240216064112.962582-2-Shyam-sundar.S-k@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmf/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index 1d6dbd246d654..853158933510e 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -296,6 +296,9 @@ static int amd_pmf_suspend_handler(struct device *dev) { struct amd_pmf_dev *pdev = dev_get_drvdata(dev); + if (pdev->smart_pc_enabled) + cancel_delayed_work_sync(&pdev->pb_work); + kfree(pdev->buf); return 0; @@ -312,6 +315,9 @@ static int amd_pmf_resume_handler(struct device *dev) return ret; } + if (pdev->smart_pc_enabled) + schedule_delayed_work(&pdev->pb_work, msecs_to_jiffies(2000)); + return 0; } -- GitLab From b2b6fa6f5c57848c95ca7bf1f0a7d1bb340c3460 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 16 Feb 2024 18:52:16 -0600 Subject: [PATCH 1231/1405] platform/x86/amd/pmf: Fix a suspend hang on Framework 13 The buffer is cleared in the suspend handler but used in the delayed work for amd_pmf_get_metrics(). Stop clearing it to fix the hang. Reported-by: Trolli Schmittlauch Closes: https://lore.kernel.org/regressions/ed2226ff-257b-4cfd-afd6-bf3be9785474@localhost/ Closes: https://community.frame.work/t/kernel-6-8-rc-system-freezes-after-resuming-from-suspend-reproducers-wanted/45381 Fixes: 2b3a7f06caaf ("platform/x86/amd/pmf: Change return type of amd_pmf_set_dram_addr()") Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20240217005216.113408-1-mario.limonciello@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmf/core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index 853158933510e..4f734e049f4a4 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -299,8 +299,6 @@ static int amd_pmf_suspend_handler(struct device *dev) if (pdev->smart_pc_enabled) cancel_delayed_work_sync(&pdev->pb_work); - kfree(pdev->buf); - return 0; } -- GitLab From 20545af302bb1f6a67c0348b64032c11ecfa77ba Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 16 Feb 2024 19:41:06 -0600 Subject: [PATCH 1232/1405] platform/x86/amd/pmf: Add debugging message for missing policy data If a machine advertises Smart PC support but is missing policy data show a debugging message to help clarify why Smart PC wasn't enabled. Reviewed-by: Hans de Goede Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20240217014107.113749-2-mario.limonciello@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmf/tee-if.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index 8b7e3f87702e1..1359ab340f7cb 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -252,8 +252,10 @@ static int amd_pmf_start_policy_engine(struct amd_pmf_dev *dev) cookie = readl(dev->policy_buf + POLICY_COOKIE_OFFSET); length = readl(dev->policy_buf + POLICY_COOKIE_LEN); - if (cookie != POLICY_SIGN_COOKIE || !length) + if (cookie != POLICY_SIGN_COOKIE || !length) { + dev_dbg(dev->dev, "cookie doesn't match\n"); return -EINVAL; + } /* Update the actual length */ dev->policy_sz = length + 512; -- GitLab From e7096150580849429ff3d43dd69a718bb2036be4 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 16 Feb 2024 19:41:07 -0600 Subject: [PATCH 1233/1405] platform/x86/amd/pmf: Fixup error handling for amd_pmf_init_smart_pc() amd_pmf_init_smart_pc() calls out to amd_pmf_get_bios_buffer() but the error handling flow doesn't clean everything up allocated memory. As amd_pmf_get_bios_buffer() is only called by amd_pmf_init_smart_pc(), fold it into the function and add labels to clean up any step that can fail along the way. Explicitly set everything allocated to NULL as there are other features that may access some of the same variables. Fixes: 7c45534afa44 ("platform/x86/amd/pmf: Add support for PMF Policy Binary") Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20240217014107.113749-3-mario.limonciello@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmf/tee-if.c | 65 ++++++++++++++++----------- 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index 1359ab340f7cb..4f74de680654a 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -338,25 +338,6 @@ static void amd_pmf_remove_pb(struct amd_pmf_dev *dev) {} static void amd_pmf_hex_dump_pb(struct amd_pmf_dev *dev) {} #endif -static int amd_pmf_get_bios_buffer(struct amd_pmf_dev *dev) -{ - dev->policy_buf = kzalloc(dev->policy_sz, GFP_KERNEL); - if (!dev->policy_buf) - return -ENOMEM; - - dev->policy_base = devm_ioremap(dev->dev, dev->policy_addr, dev->policy_sz); - if (!dev->policy_base) - return -ENOMEM; - - memcpy(dev->policy_buf, dev->policy_base, dev->policy_sz); - - amd_pmf_hex_dump_pb(dev); - if (pb_side_load) - amd_pmf_open_pb(dev, dev->dbgfs_dir); - - return amd_pmf_start_policy_engine(dev); -} - static int amd_pmf_amdtee_ta_match(struct tee_ioctl_version_data *ver, const void *data) { return ver->impl_id == TEE_IMPL_ID_AMDTEE; @@ -455,22 +436,56 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) return ret; INIT_DELAYED_WORK(&dev->pb_work, amd_pmf_invoke_cmd); - amd_pmf_set_dram_addr(dev, true); - amd_pmf_get_bios_buffer(dev); + + ret = amd_pmf_set_dram_addr(dev, true); + if (ret) + goto error; + + dev->policy_base = devm_ioremap(dev->dev, dev->policy_addr, dev->policy_sz); + if (!dev->policy_base) { + ret = -ENOMEM; + goto error; + } + + dev->policy_buf = kzalloc(dev->policy_sz, GFP_KERNEL); + if (!dev->policy_buf) { + ret = -ENOMEM; + goto error; + } + + memcpy(dev->policy_buf, dev->policy_base, dev->policy_sz); + + amd_pmf_hex_dump_pb(dev); + if (pb_side_load) + amd_pmf_open_pb(dev, dev->dbgfs_dir); + dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL); if (!dev->prev_data) - return -ENOMEM; + goto error; - return dev->smart_pc_enabled; + ret = amd_pmf_start_policy_engine(dev); + if (ret) + goto error; + + return 0; + +error: + amd_pmf_deinit_smart_pc(dev); + + return ret; } void amd_pmf_deinit_smart_pc(struct amd_pmf_dev *dev) { - if (pb_side_load) + if (pb_side_load && dev->esbin) amd_pmf_remove_pb(dev); + cancel_delayed_work_sync(&dev->pb_work); kfree(dev->prev_data); + dev->prev_data = NULL; kfree(dev->policy_buf); - cancel_delayed_work_sync(&dev->pb_work); + dev->policy_buf = NULL; + kfree(dev->buf); + dev->buf = NULL; amd_pmf_tee_deinit(dev); } -- GitLab From 76d41fb063338781e936765b7ed74224215ca178 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 16 Feb 2024 19:56:42 -0600 Subject: [PATCH 1234/1405] platform/x86/amd/pmf: Fix a potential race with policy binary sideload The debugfs `update_policy` file is created before amd_pmf_start_policy_engine() has completed, and thus there could be a possible (albeit unlikely) race between sideloading a policy and the BIOS policy getting setup. Move the debugfs file creation after all BIOS policy is setup. Fixes: 10817f28e533 ("platform/x86/amd/pmf: Add capability to sideload of policy binary") Reported-by: Hans de Goede Closes: https://lore.kernel.org/platform-driver-x86/15df7d02-b0aa-457a-954a-9d280a592843@redhat.com/T/#m2c445f135e5ef9b53184be7fc9df84e15f89d4d9 Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20240217015642.113806-1-mario.limonciello@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmf/tee-if.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index 4f74de680654a..8527dca9cf560 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -456,8 +456,6 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) memcpy(dev->policy_buf, dev->policy_base, dev->policy_sz); amd_pmf_hex_dump_pb(dev); - if (pb_side_load) - amd_pmf_open_pb(dev, dev->dbgfs_dir); dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL); if (!dev->prev_data) @@ -467,6 +465,9 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) if (ret) goto error; + if (pb_side_load) + amd_pmf_open_pb(dev, dev->dbgfs_dir); + return 0; error: -- GitLab From 9c92006b896c767218aabe8947b62026a571cfd0 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Wed, 31 Jan 2024 09:19:33 +0100 Subject: [PATCH 1235/1405] irqchip/sifive-plic: Enable interrupt if needed before EOI RISC-V PLIC cannot "end-of-interrupt" (EOI) disabled interrupts, as explained in the description of Interrupt Completion in the PLIC spec: "The PLIC signals it has completed executing an interrupt handler by writing the interrupt ID it received from the claim to the claim/complete register. The PLIC does not check whether the completion ID is the same as the last claim ID for that target. If the completion ID does not match an interrupt source that *is currently enabled* for the target, the completion is silently ignored." Commit 69ea463021be ("irqchip/sifive-plic: Fixup EOI failed when masked") ensured that EOI is successful by enabling interrupt first, before EOI. Commit a1706a1c5062 ("irqchip/sifive-plic: Separate the enable and mask operations") removed the interrupt enabling code from the previous commit, because it assumes that interrupt should already be enabled at the point of EOI. However, this is incorrect: there is a window after a hart claiming an interrupt and before irq_desc->lock getting acquired, interrupt can be disabled during this window. Thus, EOI can be invoked while the interrupt is disabled, effectively nullify this EOI. This results in the interrupt never gets asserted again, and the device who uses this interrupt appears frozen. Make sure that interrupt is really enabled before EOI. Fixes: a1706a1c5062 ("irqchip/sifive-plic: Separate the enable and mask operations") Signed-off-by: Nam Cao Signed-off-by: Thomas Gleixner Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Samuel Holland Cc: Marc Zyngier Cc: Guo Ren Cc: linux-riscv@lists.infradead.org Cc: Link: https://lore.kernel.org/r/20240131081933.144512-1-namcao@linutronix.de --- drivers/irqchip/irq-sifive-plic.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 5b7bc4fd9517c..bf0b40b0fad4b 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -148,7 +148,13 @@ static void plic_irq_eoi(struct irq_data *d) { struct plic_handler *handler = this_cpu_ptr(&plic_handlers); - writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + if (unlikely(irqd_irq_disabled(d))) { + plic_toggle(handler, d->hwirq, 1); + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + plic_toggle(handler, d->hwirq, 0); + } else { + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + } } #ifdef CONFIG_SMP -- GitLab From db744ddd59be798c2627efbfc71f707f5a935a40 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Mon, 15 Jan 2024 19:26:49 +0530 Subject: [PATCH 1236/1405] PCI/MSI: Prevent MSI hardware interrupt number truncation While calculating the hardware interrupt number for a MSI interrupt, the higher bits (i.e. from bit-5 onwards a.k.a domain_nr >= 32) of the PCI domain number gets truncated because of the shifted value casting to return type of pci_domain_nr() which is 'int'. This for example is resulting in same hardware interrupt number for devices 0019:00:00.0 and 0039:00:00.0. To address this cast the PCI domain number to 'irq_hw_number_t' before left shifting it to calculate the hardware interrupt number. Please note that this fixes the issue only on 64-bit systems and doesn't change the behavior for 32-bit systems i.e. the 32-bit systems continue to have the issue. Since the issue surfaces only if there are too many PCIe controllers in the system which usually is the case in modern server systems and they don't tend to run 32-bit kernels. Fixes: 3878eaefb89a ("PCI/MSI: Enhance core to support hierarchy irqdomain") Signed-off-by: Vidya Sagar Signed-off-by: Thomas Gleixner Tested-by: Shanker Donthineni Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240115135649.708536-1-vidyas@nvidia.com --- drivers/pci/msi/irqdomain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index c8be056c248de..cfd84a899c82d 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -61,7 +61,7 @@ static irq_hw_number_t pci_msi_domain_calc_hwirq(struct msi_desc *desc) return (irq_hw_number_t)desc->msi_index | pci_dev_id(dev) << 11 | - (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 27; + ((irq_hw_number_t)(pci_domain_nr(dev->bus) & 0xFFFFFFFF)) << 27; } static void pci_msi_domain_set_desc(msi_alloc_info_t *arg, -- GitLab From 882a2a724ee964c1ebe7268a91d5c8c8ddc796bf Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 15 Feb 2024 13:51:45 -0800 Subject: [PATCH 1237/1405] parisc: Fix stack unwinder Debugging shows a large number of unaligned access traps in the unwinder code. Code analysis reveals a number of issues with this code: - handle_interruption is passed twice through dereference_kernel_function_descriptor() - ret_from_kernel_thread, syscall_exit, intr_return, _switch_to_ret, and _call_on_stack are passed through dereference_kernel_function_descriptor() even though they are not declared as function pointers. To fix the problems, drop one of the calls to dereference_kernel_function_descriptor() for handle_interruption, and compare the other pointers directly. Fixes: 6414b30b39f9 ("parisc: unwind: Avoid missing prototype warning for handle_interruption()") Fixes: 8e0ba125c2bf ("parisc/unwind: fix unwinder when CONFIG_64BIT is enabled") Cc: Helge Deller Cc: Sven Schnelle Cc: John David Anglin Cc: Charlie Jenkins Cc: David Laight Signed-off-by: Guenter Roeck Signed-off-by: Helge Deller --- arch/parisc/kernel/unwind.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index 27ae40a443b80..f7e0fee5ee55a 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -228,10 +228,8 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int #ifdef CONFIG_IRQSTACKS extern void * const _call_on_stack; #endif /* CONFIG_IRQSTACKS */ - void *ptr; - ptr = dereference_kernel_function_descriptor(&handle_interruption); - if (pc_is_kernel_fn(pc, ptr)) { + if (pc_is_kernel_fn(pc, handle_interruption)) { struct pt_regs *regs = (struct pt_regs *)(info->sp - frame_size - PT_SZ_ALGN); dbg("Unwinding through handle_interruption()\n"); info->prev_sp = regs->gr[30]; @@ -239,13 +237,13 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int return 1; } - if (pc_is_kernel_fn(pc, ret_from_kernel_thread) || - pc_is_kernel_fn(pc, syscall_exit)) { + if (pc == (unsigned long)&ret_from_kernel_thread || + pc == (unsigned long)&syscall_exit) { info->prev_sp = info->prev_ip = 0; return 1; } - if (pc_is_kernel_fn(pc, intr_return)) { + if (pc == (unsigned long)&intr_return) { struct pt_regs *regs; dbg("Found intr_return()\n"); @@ -257,14 +255,14 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int } if (pc_is_kernel_fn(pc, _switch_to) || - pc_is_kernel_fn(pc, _switch_to_ret)) { + pc == (unsigned long)&_switch_to_ret) { info->prev_sp = info->sp - CALLEE_SAVE_FRAME_SIZE; info->prev_ip = *(unsigned long *)(info->prev_sp - RP_OFFSET); return 1; } #ifdef CONFIG_IRQSTACKS - if (pc_is_kernel_fn(pc, _call_on_stack)) { + if (pc == (unsigned long)&_call_on_stack) { info->prev_sp = *(unsigned long *)(info->sp - FRAME_SIZE - REG_SZ); info->prev_ip = *(unsigned long *)(info->sp - FRAME_SIZE - RP_OFFSET); return 1; -- GitLab From baf8361e54550a48a7087b603313ad013cc13386 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Tue, 13 Feb 2024 18:21:35 -0800 Subject: [PATCH 1238/1405] x86/bugs: Add asm helpers for executing VERW MDS mitigation requires clearing the CPU buffers before returning to user. This needs to be done late in the exit-to-user path. Current location of VERW leaves a possibility of kernel data ending up in CPU buffers for memory accesses done after VERW such as: 1. Kernel data accessed by an NMI between VERW and return-to-user can remain in CPU buffers since NMI returning to kernel does not execute VERW to clear CPU buffers. 2. Alyssa reported that after VERW is executed, CONFIG_GCC_PLUGIN_STACKLEAK=y scrubs the stack used by a system call. Memory accesses during stack scrubbing can move kernel stack contents into CPU buffers. 3. When caller saved registers are restored after a return from function executing VERW, the kernel stack accesses can remain in CPU buffers(since they occur after VERW). To fix this VERW needs to be moved very late in exit-to-user path. In preparation for moving VERW to entry/exit asm code, create macros that can be used in asm. Also make VERW patching depend on a new feature flag X86_FEATURE_CLEAR_CPU_BUF. Reported-by: Alyssa Milburn Suggested-by: Andrew Cooper Suggested-by: Peter Zijlstra Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240213-delay-verw-v8-1-a6216d83edb7%40linux.intel.com --- arch/x86/entry/entry.S | 23 +++++++++++++++++++++++ arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/include/asm/nospec-branch.h | 13 +++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S index 8c8d38f0cb1df..0033790499245 100644 --- a/arch/x86/entry/entry.S +++ b/arch/x86/entry/entry.S @@ -6,6 +6,9 @@ #include #include #include +#include +#include +#include .pushsection .noinstr.text, "ax" @@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb) EXPORT_SYMBOL_GPL(entry_ibpb); .popsection + +/* + * Define the VERW operand that is disguised as entry code so that + * it can be referenced with KPTI enabled. This ensure VERW can be + * used late in exit-to-user path after page tables are switched. + */ +.pushsection .entry.text, "ax" + +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_START_NOALIGN(mds_verw_sel) + UNWIND_HINT_UNDEFINED + ANNOTATE_NOENDBR + .word __KERNEL_DS +.align L1_CACHE_BYTES, 0xcc +SYM_CODE_END(mds_verw_sel); +/* For KVM */ +EXPORT_SYMBOL_GPL(mds_verw_sel); + +.popsection + diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index fdf723b6f6d0c..2b62cdd8dd122 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -95,7 +95,7 @@ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ #define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */ -/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */ +#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ #define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 262e65539f83c..077083ec81cbe 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -315,6 +315,17 @@ #endif .endm +/* + * Macro to execute VERW instruction that mitigate transient data sampling + * attacks such as MDS. On affected systems a microcode update overloaded VERW + * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF. + * + * Note: Only the memory operand variant of VERW clears the CPU buffers. + */ +.macro CLEAR_CPU_BUFFERS + ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF +.endm + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -536,6 +547,8 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); +extern u16 mds_verw_sel; + #include /** -- GitLab From 3c7501722e6b31a6e56edd23cea5e77dbb9ffd1a Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Tue, 13 Feb 2024 18:21:52 -0800 Subject: [PATCH 1239/1405] x86/entry_64: Add VERW just before userspace transition Mitigation for MDS is to use VERW instruction to clear any secrets in CPU Buffers. Any memory accesses after VERW execution can still remain in CPU buffers. It is safer to execute VERW late in return to user path to minimize the window in which kernel data can end up in CPU buffers. There are not many kernel secrets to be had after SWITCH_TO_USER_CR3. Add support for deploying VERW mitigation after user register state is restored. This helps minimize the chances of kernel data ending up into CPU buffers after executing VERW. Note that the mitigation at the new location is not yet enabled. Corner case not handled ======================= Interrupts returning to kernel don't clear CPUs buffers since the exit-to-user path is expected to do that anyways. But, there could be a case when an NMI is generated in kernel after the exit-to-user path has cleared the buffers. This case is not handled and NMI returning to kernel don't clear CPU buffers because: 1. It is rare to get an NMI after VERW, but before returning to userspace. 2. For an unprivileged user, there is no known way to make that NMI less rare or target it. 3. It would take a large number of these precisely-timed NMIs to mount an actual attack. There's presumably not enough bandwidth. 4. The NMI in question occurs after a VERW, i.e. when user state is restored and most interesting data is already scrubbed. Whats left is only the data that NMI touches, and that may or may not be of any interest. Suggested-by: Dave Hansen Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240213-delay-verw-v8-2-a6216d83edb7%40linux.intel.com --- arch/x86/entry/entry_64.S | 11 +++++++++++ arch/x86/entry/entry_64_compat.S | 1 + 2 files changed, 12 insertions(+) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index c40f89ab1b4c7..9bb4859776291 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -161,6 +161,7 @@ syscall_return_via_sysret: SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL) ANNOTATE_NOENDBR swapgs + CLEAR_CPU_BUFFERS sysretq SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR @@ -573,6 +574,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) .Lswapgs_and_iret: swapgs + CLEAR_CPU_BUFFERS /* Assert that the IRET frame indicates user mode. */ testb $3, 8(%rsp) jnz .Lnative_iret @@ -723,6 +725,8 @@ native_irq_return_ldt: */ popq %rax /* Restore user RAX */ + CLEAR_CPU_BUFFERS + /* * RSP now points to an ordinary IRET frame, except that the page * is read-only and RSP[31:16] are preloaded with the userspace @@ -1449,6 +1453,12 @@ nmi_restore: std movq $0, 5*8(%rsp) /* clear "NMI executing" */ + /* + * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like + * NMI in kernel after user state is restored. For an unprivileged user + * these conditions are hard to meet. + */ + /* * iretq reads the "iret" frame and exits the NMI stack in a * single instruction. We are returning to kernel mode, so this @@ -1466,6 +1476,7 @@ SYM_CODE_START(entry_SYSCALL32_ignore) UNWIND_HINT_END_OF_STACK ENDBR mov $-ENOSYS, %eax + CLEAR_CPU_BUFFERS sysretl SYM_CODE_END(entry_SYSCALL32_ignore) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index de94e2e84ecca..eabf48c4d4b4c 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -270,6 +270,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL) xorl %r9d, %r9d xorl %r10d, %r10d swapgs + CLEAR_CPU_BUFFERS sysretl SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR -- GitLab From a0e2dab44d22b913b4c228c8b52b2a104434b0b3 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Tue, 13 Feb 2024 18:22:08 -0800 Subject: [PATCH 1240/1405] x86/entry_32: Add VERW just before userspace transition As done for entry_64, add support for executing VERW late in exit to user path for 32-bit mode. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240213-delay-verw-v8-3-a6216d83edb7%40linux.intel.com --- arch/x86/entry/entry_32.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index c73047bf9f4bf..fba427646805d 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -885,6 +885,7 @@ SYM_FUNC_START(entry_SYSENTER_32) BUG_IF_WRONG_CR3 no_user_check=1 popfl popl %eax + CLEAR_CPU_BUFFERS /* * Return back to the vDSO, which will pop ecx and edx. @@ -954,6 +955,7 @@ restore_all_switch_stack: /* Restore user state */ RESTORE_REGS pop=4 # skip orig_eax/error_code + CLEAR_CPU_BUFFERS .Lirq_return: /* * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization @@ -1146,6 +1148,7 @@ SYM_CODE_START(asm_exc_nmi) /* Not on SYSENTER stack. */ call exc_nmi + CLEAR_CPU_BUFFERS jmp .Lnmi_return .Lnmi_from_sysenter_stack: -- GitLab From 6613d82e617dd7eb8b0c40b2fe3acea655b1d611 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Tue, 13 Feb 2024 18:22:24 -0800 Subject: [PATCH 1241/1405] x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key The VERW mitigation at exit-to-user is enabled via a static branch mds_user_clear. This static branch is never toggled after boot, and can be safely replaced with an ALTERNATIVE() which is convenient to use in asm. Switch to ALTERNATIVE() to use the VERW mitigation late in exit-to-user path. Also remove the now redundant VERW in exc_nmi() and arch_exit_to_user_mode(). Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240213-delay-verw-v8-4-a6216d83edb7%40linux.intel.com --- Documentation/arch/x86/mds.rst | 38 ++++++++++++++++++++-------- arch/x86/include/asm/entry-common.h | 1 - arch/x86/include/asm/nospec-branch.h | 12 --------- arch/x86/kernel/cpu/bugs.c | 15 +++++------ arch/x86/kernel/nmi.c | 3 --- arch/x86/kvm/vmx/vmx.c | 2 +- 6 files changed, 34 insertions(+), 37 deletions(-) diff --git a/Documentation/arch/x86/mds.rst b/Documentation/arch/x86/mds.rst index e73fdff62c0aa..c58c72362911c 100644 --- a/Documentation/arch/x86/mds.rst +++ b/Documentation/arch/x86/mds.rst @@ -95,6 +95,9 @@ The kernel provides a function to invoke the buffer clearing: mds_clear_cpu_buffers() +Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path. +Other than CFLAGS.ZF, this macro doesn't clobber any registers. + The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state (idle) transitions. @@ -138,17 +141,30 @@ Mitigation points When transitioning from kernel to user space the CPU buffers are flushed on affected CPUs when the mitigation is not disabled on the kernel - command line. The migitation is enabled through the static key - mds_user_clear. - - The mitigation is invoked in prepare_exit_to_usermode() which covers - all but one of the kernel to user space transitions. The exception - is when we return from a Non Maskable Interrupt (NMI), which is - handled directly in do_nmi(). - - (The reason that NMI is special is that prepare_exit_to_usermode() can - enable IRQs. In NMI context, NMIs are blocked, and we don't want to - enable IRQs with NMIs blocked.) + command line. The mitigation is enabled through the feature flag + X86_FEATURE_CLEAR_CPU_BUF. + + The mitigation is invoked just before transitioning to userspace after + user registers are restored. This is done to minimize the window in + which kernel data could be accessed after VERW e.g. via an NMI after + VERW. + + **Corner case not handled** + Interrupts returning to kernel don't clear CPUs buffers since the + exit-to-user path is expected to do that anyways. But, there could be + a case when an NMI is generated in kernel after the exit-to-user path + has cleared the buffers. This case is not handled and NMI returning to + kernel don't clear CPU buffers because: + + 1. It is rare to get an NMI after VERW, but before returning to userspace. + 2. For an unprivileged user, there is no known way to make that NMI + less rare or target it. + 3. It would take a large number of these precisely-timed NMIs to mount + an actual attack. There's presumably not enough bandwidth. + 4. The NMI in question occurs after a VERW, i.e. when user state is + restored and most interesting data is already scrubbed. Whats left + is only the data that NMI touches, and that may or may not be of + any interest. 2. C-State transition diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index ce8f50192ae3e..7e523bb3d2d31 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, static __always_inline void arch_exit_to_user_mode(void) { - mds_user_clear_cpu_buffers(); amd_clear_divider(); } #define arch_exit_to_user_mode arch_exit_to_user_mode diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 077083ec81cbe..2aa52cab1e463 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -540,7 +540,6 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -DECLARE_STATIC_KEY_FALSE(mds_user_clear); DECLARE_STATIC_KEY_FALSE(mds_idle_clear); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); @@ -574,17 +573,6 @@ static __always_inline void mds_clear_cpu_buffers(void) asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); } -/** - * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability - * - * Clear CPU buffers if the corresponding static key is enabled - */ -static __always_inline void mds_user_clear_cpu_buffers(void) -{ - if (static_branch_likely(&mds_user_clear)) - mds_clear_cpu_buffers(); -} - /** * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability * diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bb0ab8466b919..48d049cd74e71 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -111,9 +111,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); /* Control unconditional IBPB in switch_mm() */ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); -/* Control MDS CPU buffer clear before returning to user space */ -DEFINE_STATIC_KEY_FALSE(mds_user_clear); -EXPORT_SYMBOL_GPL(mds_user_clear); /* Control MDS CPU buffer clear before idling (halt, mwait) */ DEFINE_STATIC_KEY_FALSE(mds_idle_clear); EXPORT_SYMBOL_GPL(mds_idle_clear); @@ -252,7 +249,7 @@ static void __init mds_select_mitigation(void) if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) mds_mitigation = MDS_MITIGATION_VMWERV; - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && (mds_nosmt || cpu_mitigations_auto_nosmt())) @@ -356,7 +353,7 @@ static void __init taa_select_mitigation(void) * For guests that can't determine whether the correct microcode is * present on host, enable the mitigation for UCODE_NEEDED as well. */ - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); @@ -424,7 +421,7 @@ static void __init mmio_select_mitigation(void) */ if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM))) - static_branch_enable(&mds_user_clear); + setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); else static_branch_enable(&mmio_stale_data_clear); @@ -484,12 +481,12 @@ static void __init md_clear_update_mitigation(void) if (cpu_mitigations_off()) return; - if (!static_key_enabled(&mds_user_clear)) + if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF)) goto out; /* - * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data - * mitigation, if necessary. + * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO + * Stale Data mitigation, if necessary. */ if (mds_mitigation == MDS_MITIGATION_OFF && boot_cpu_has_bug(X86_BUG_MDS)) { diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 17e955ab69fed..3082cf24b69e3 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -563,9 +563,6 @@ DEFINE_IDTENTRY_RAW(exc_nmi) } if (this_cpu_dec_return(nmi_state)) goto nmi_restart; - - if (user_mode(regs)) - mds_user_clear_cpu_buffers(); } #if IS_ENABLED(CONFIG_KVM_INTEL) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 1111d9d089038..db8a5fe7edf67 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7227,7 +7227,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, /* L1D Flush includes CPU buffer clear to mitigate MDS */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); - else if (static_branch_unlikely(&mds_user_clear)) + else if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF)) mds_clear_cpu_buffers(); else if (static_branch_unlikely(&mmio_stale_data_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) -- GitLab From 706a189dcf74d3b3f955e9384785e726ed6c7c80 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Feb 2024 18:22:40 -0800 Subject: [PATCH 1242/1405] KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH Use EFLAGS.CF instead of EFLAGS.ZF to track whether to use VMRESUME versus VMLAUNCH. Freeing up EFLAGS.ZF will allow doing VERW, which clobbers ZF, for MDS mitigations as late as possible without needing to duplicate VERW for both paths. Signed-off-by: Sean Christopherson Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Reviewed-by: Nikolay Borisov Link: https://lore.kernel.org/all/20240213-delay-verw-v8-5-a6216d83edb7%40linux.intel.com --- arch/x86/kvm/vmx/run_flags.h | 7 +++++-- arch/x86/kvm/vmx/vmenter.S | 6 +++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h index edc3f16cc1896..6a9bfdfbb6e59 100644 --- a/arch/x86/kvm/vmx/run_flags.h +++ b/arch/x86/kvm/vmx/run_flags.h @@ -2,7 +2,10 @@ #ifndef __KVM_X86_VMX_RUN_FLAGS_H #define __KVM_X86_VMX_RUN_FLAGS_H -#define VMX_RUN_VMRESUME (1 << 0) -#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) +#define VMX_RUN_VMRESUME_SHIFT 0 +#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1 + +#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT) +#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT) #endif /* __KVM_X86_VMX_RUN_FLAGS_H */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 906ecd0015113..ef7cfbad4d573 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -139,7 +139,7 @@ SYM_FUNC_START(__vmx_vcpu_run) mov (%_ASM_SP), %_ASM_AX /* Check if vmlaunch or vmresume is needed */ - test $VMX_RUN_VMRESUME, %ebx + bt $VMX_RUN_VMRESUME_SHIFT, %ebx /* Load guest registers. Don't clobber flags. */ mov VCPU_RCX(%_ASM_AX), %_ASM_CX @@ -161,8 +161,8 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX - /* Check EFLAGS.ZF from 'test VMX_RUN_VMRESUME' above */ - jz .Lvmlaunch + /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */ + jnc .Lvmlaunch /* * After a successful VMRESUME/VMLAUNCH, control flow "magically" -- GitLab From 43fb862de8f628c5db5e96831c915b9aebf62d33 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Tue, 13 Feb 2024 18:22:56 -0800 Subject: [PATCH 1243/1405] KVM/VMX: Move VERW closer to VMentry for MDS mitigation During VMentry VERW is executed to mitigate MDS. After VERW, any memory access like register push onto stack may put host data in MDS affected CPU buffers. A guest can then use MDS to sample host data. Although likelihood of secrets surviving in registers at current VERW callsite is less, but it can't be ruled out. Harden the MDS mitigation by moving the VERW mitigation late in VMentry path. Note that VERW for MMIO Stale Data mitigation is unchanged because of the complexity of per-guest conditional VERW which is not easy to handle that late in asm with no GPRs available. If the CPU is also affected by MDS, VERW is unconditionally executed late in asm regardless of guest having MMIO access. Signed-off-by: Pawan Gupta Signed-off-by: Dave Hansen Acked-by: Sean Christopherson Link: https://lore.kernel.org/all/20240213-delay-verw-v8-6-a6216d83edb7%40linux.intel.com --- arch/x86/kvm/vmx/vmenter.S | 3 +++ arch/x86/kvm/vmx/vmx.c | 20 ++++++++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index ef7cfbad4d573..2bfbf758d0611 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -161,6 +161,9 @@ SYM_FUNC_START(__vmx_vcpu_run) /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX + /* Clobbers EFLAGS.ZF */ + CLEAR_CPU_BUFFERS + /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */ jnc .Lvmlaunch diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index db8a5fe7edf67..88a4ff200d04b 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -388,7 +388,16 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) { - vmx->disable_fb_clear = (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) && + /* + * Disable VERW's behavior of clearing CPU buffers for the guest if the + * CPU isn't affected by MDS/TAA, and the host hasn't forcefully enabled + * the mitigation. Disabling the clearing behavior provides a + * performance boost for guests that aren't aware that manually clearing + * CPU buffers is unnecessary, at the cost of MSR accesses on VM-Entry + * and VM-Exit. + */ + vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) && + (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) && !boot_cpu_has_bug(X86_BUG_MDS) && !boot_cpu_has_bug(X86_BUG_TAA); @@ -7224,11 +7233,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, guest_state_enter_irqoff(); - /* L1D Flush includes CPU buffer clear to mitigate MDS */ + /* + * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW + * mitigation for MDS is done late in VMentry and is still + * executed in spite of L1D Flush. This is because an extra VERW + * should not matter much after the big hammer L1D Flush. + */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); - else if (cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF)) - mds_clear_cpu_buffers(); else if (static_branch_unlikely(&mmio_stale_data_clear) && kvm_arch_has_assigned_device(vcpu->kvm)) mds_clear_cpu_buffers(); -- GitLab From 2c88c16dc20e88dd54d2f6f4d01ae1dce6cc9654 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 12 Feb 2024 22:44:11 -0500 Subject: [PATCH 1244/1405] erofs: fix handling kern_mount() failure if you have a variable that holds NULL or a pointer to live struct mount, do not shove ERR_PTR() into it - not if you later treat "not NULL" as "holds a pointer to object". Signed-off-by: Al Viro --- fs/erofs/fscache.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index bc12030393b24..29ad5b1cc7fde 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -381,11 +381,12 @@ static int erofs_fscache_init_domain(struct super_block *sb) goto out; if (!erofs_pseudo_mnt) { - erofs_pseudo_mnt = kern_mount(&erofs_fs_type); - if (IS_ERR(erofs_pseudo_mnt)) { - err = PTR_ERR(erofs_pseudo_mnt); + struct vfsmount *mnt = kern_mount(&erofs_fs_type); + if (IS_ERR(mnt)) { + err = PTR_ERR(mnt); goto out; } + erofs_pseudo_mnt = mnt; } domain->volume = sbi->volume; -- GitLab From 69f89168b310878be82d7d97bc0d22068ad858c0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 12 Feb 2024 18:42:13 +0000 Subject: [PATCH 1245/1405] usb: typec: tpcm: Fix issues with power being removed during reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the merge of b717dfbf73e8 ("Revert "usb: typec: tcpm: fix cc role at port reset"") into mainline the LibreTech Renegade Elite/Firefly has died during boot, the main symptom observed in testing is a sudden stop in console output. Gábor Stefanik identified in review that the patch would cause power to be removed from devices without batteries (like this board), observing that while the patch is correct according to the spec this appears to be an oversight in the spec. Given that the change makes previously working systems unusable let's revert it, there was some discussion of identifying systems that have alternative power and implementing the standards conforming behaviour in only that case. Fixes: b717dfbf73e8 ("Revert "usb: typec: tcpm: fix cc role at port reset"") Cc: stable Cc: Badhri Jagan Sridharan Signed-off-by: Mark Brown Acked-by: Heikki Krogerus Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20240212-usb-fix-renegade-v1-1-22c43c88d635@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 295ae7eb912c8..66e532edcece6 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4873,7 +4873,8 @@ static void run_state_machine(struct tcpm_port *port) break; case PORT_RESET: tcpm_reset_port(port); - tcpm_set_cc(port, TYPEC_CC_OPEN); + tcpm_set_cc(port, tcpm_default_state(port) == SNK_UNATTACHED ? + TYPEC_CC_RD : tcpm_rp_cc(port)); tcpm_set_state(port, PORT_RESET_WAIT_OFF, PD_T_ERROR_RECOVERY); break; -- GitLab From e21a2f17566cbd64926fb8f16323972f7a064444 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Sat, 17 Feb 2024 16:14:31 +0800 Subject: [PATCH 1246/1405] cachefiles: fix memory leak in cachefiles_add_cache() The following memory leak was reported after unbinding /dev/cachefiles: ================================================================== unreferenced object 0xffff9b674176e3c0 (size 192): comm "cachefilesd2", pid 680, jiffies 4294881224 hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc ea38a44b): [] kmem_cache_alloc+0x2d5/0x370 [] prepare_creds+0x26/0x2e0 [] cachefiles_determine_cache_security+0x1f/0x120 [] cachefiles_add_cache+0x13c/0x3a0 [] cachefiles_daemon_write+0x146/0x1c0 [] vfs_write+0xcb/0x520 [] ksys_write+0x69/0xf0 [] do_syscall_64+0x72/0x140 [] entry_SYSCALL_64_after_hwframe+0x6e/0x76 ================================================================== Put the reference count of cache_cred in cachefiles_daemon_unbind() to fix the problem. And also put cache_cred in cachefiles_add_cache() error branch to avoid memory leaks. Fixes: 9ae326a69004 ("CacheFiles: A cache that backs onto a mounted filesystem") CC: stable@vger.kernel.org Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240217081431.796809-1-libaokun1@huawei.com Acked-by: David Howells Reviewed-by: Jingbo Xu Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/cachefiles/cache.c | 2 ++ fs/cachefiles/daemon.c | 1 + 2 files changed, 3 insertions(+) diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c index 7077f72e6f474..f449f7340aad0 100644 --- a/fs/cachefiles/cache.c +++ b/fs/cachefiles/cache.c @@ -168,6 +168,8 @@ int cachefiles_add_cache(struct cachefiles_cache *cache) dput(root); error_open_root: cachefiles_end_secure(cache, saved_cred); + put_cred(cache->cache_cred); + cache->cache_cred = NULL; error_getsec: fscache_relinquish_cache(cache_cookie); cache->cache = NULL; diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 3f24905f40661..6465e25742309 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -816,6 +816,7 @@ static void cachefiles_daemon_unbind(struct cachefiles_cache *cache) cachefiles_put_directory(cache->graveyard); cachefiles_put_directory(cache->store); mntput(cache->mnt); + put_cred(cache->cache_cred); kfree(cache->rootdirname); kfree(cache->secctx); -- GitLab From bfacaf71a1482d936804213a3ffa6de73558280e Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Mon, 19 Feb 2024 14:39:02 +0000 Subject: [PATCH 1247/1405] afs: Fix ignored callbacks over ipv4 When searching for a matching peer, all addresses need to be searched, not just the ipv6 ones in the fs_addresses6 list. Given that the lists no longer contain addresses, there is little reason to splitting things between separate lists, so unify them into a single list. When processing an incoming callback from an ipv4 address, this would lead to a failure to set call->server, resulting in the callback being ignored and the client seeing stale contents. Fixes: 72904d7b9bfb ("rxrpc, afs: Allow afs to pin rxrpc_peer objects") Reported-by: Markus Suvanto Link: https://lists.infradead.org/pipermail/linux-afs/2024-February/008035.html Signed-off-by: Marc Dionne Signed-off-by: David Howells Link: https://lists.infradead.org/pipermail/linux-afs/2024-February/008037.html # v1 Link: https://lists.infradead.org/pipermail/linux-afs/2024-February/008066.html # v2 Link: https://lore.kernel.org/r/20240219143906.138346-2-dhowells@redhat.com Signed-off-by: Christian Brauner --- fs/afs/internal.h | 6 ++---- fs/afs/main.c | 3 +-- fs/afs/server.c | 14 +++++--------- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 9c03fcf7ffaa8..6ce5a612937c6 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -321,8 +321,7 @@ struct afs_net { struct list_head fs_probe_slow; /* List of afs_server to probe at 5m intervals */ struct hlist_head fs_proc; /* procfs servers list */ - struct hlist_head fs_addresses4; /* afs_server (by lowest IPv4 addr) */ - struct hlist_head fs_addresses6; /* afs_server (by lowest IPv6 addr) */ + struct hlist_head fs_addresses; /* afs_server (by lowest IPv6 addr) */ seqlock_t fs_addr_lock; /* For fs_addresses[46] */ struct work_struct fs_manager; @@ -561,8 +560,7 @@ struct afs_server { struct afs_server __rcu *uuid_next; /* Next server with same UUID */ struct afs_server *uuid_prev; /* Previous server with same UUID */ struct list_head probe_link; /* Link in net->fs_probe_list */ - struct hlist_node addr4_link; /* Link in net->fs_addresses4 */ - struct hlist_node addr6_link; /* Link in net->fs_addresses6 */ + struct hlist_node addr_link; /* Link in net->fs_addresses6 */ struct hlist_node proc_link; /* Link in net->fs_proc */ struct list_head volumes; /* RCU list of afs_server_entry objects */ struct afs_server *gc_next; /* Next server in manager's list */ diff --git a/fs/afs/main.c b/fs/afs/main.c index 1b3bd21c168ac..a14f6013e316d 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -90,8 +90,7 @@ static int __net_init afs_net_init(struct net *net_ns) INIT_LIST_HEAD(&net->fs_probe_slow); INIT_HLIST_HEAD(&net->fs_proc); - INIT_HLIST_HEAD(&net->fs_addresses4); - INIT_HLIST_HEAD(&net->fs_addresses6); + INIT_HLIST_HEAD(&net->fs_addresses); seqlock_init(&net->fs_addr_lock); INIT_WORK(&net->fs_manager, afs_manage_servers); diff --git a/fs/afs/server.c b/fs/afs/server.c index e169121f603e2..038f9d0ae3af8 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -38,7 +38,7 @@ struct afs_server *afs_find_server(struct afs_net *net, const struct rxrpc_peer seq++; /* 2 on the 1st/lockless path, otherwise odd */ read_seqbegin_or_lock(&net->fs_addr_lock, &seq); - hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) { + hlist_for_each_entry_rcu(server, &net->fs_addresses, addr_link) { estate = rcu_dereference(server->endpoint_state); alist = estate->addresses; for (i = 0; i < alist->nr_addrs; i++) @@ -177,10 +177,8 @@ static struct afs_server *afs_install_server(struct afs_cell *cell, * bit, but anything we might want to do gets messy and memory * intensive. */ - if (alist->nr_ipv4 > 0) - hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4); - if (alist->nr_addrs > alist->nr_ipv4) - hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6); + if (alist->nr_addrs > 0) + hlist_add_head_rcu(&server->addr_link, &net->fs_addresses); write_sequnlock(&net->fs_addr_lock); @@ -511,10 +509,8 @@ static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list) list_del(&server->probe_link); hlist_del_rcu(&server->proc_link); - if (!hlist_unhashed(&server->addr4_link)) - hlist_del_rcu(&server->addr4_link); - if (!hlist_unhashed(&server->addr6_link)) - hlist_del_rcu(&server->addr6_link); + if (!hlist_unhashed(&server->addr_link)) + hlist_del_rcu(&server->addr_link); } write_sequnlock(&net->fs_lock); -- GitLab From 6ea38e2aeb72349cad50e38899b0ba6fbcb2af3d Mon Sep 17 00:00:00 2001 From: Daniil Dulov Date: Mon, 19 Feb 2024 14:39:03 +0000 Subject: [PATCH 1248/1405] afs: Increase buffer size in afs_update_volume_status() The max length of volume->vid value is 20 characters. So increase idbuf[] size up to 24 to avoid overflow. Found by Linux Verification Center (linuxtesting.org) with SVACE. [DH: Actually, it's 20 + NUL, so increase it to 24 and use snprintf()] Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Signed-off-by: Daniil Dulov Signed-off-by: David Howells Link: https://lore.kernel.org/r/20240211150442.3416-1-d.dulov@aladdin.ru/ # v1 Link: https://lore.kernel.org/r/20240212083347.10742-1-d.dulov@aladdin.ru/ # v2 Link: https://lore.kernel.org/r/20240219143906.138346-3-dhowells@redhat.com Signed-off-by: Christian Brauner --- fs/afs/volume.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 020ecd45e4762..af3a3f57c1b3f 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -353,7 +353,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) { struct afs_server_list *new, *old, *discard; struct afs_vldb_entry *vldb; - char idbuf[16]; + char idbuf[24]; int ret, idsz; _enter(""); @@ -361,7 +361,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) /* We look up an ID by passing it as a decimal string in the * operation's name parameter. */ - idsz = sprintf(idbuf, "%llu", volume->vid); + idsz = snprintf(idbuf, sizeof(idbuf), "%llu", volume->vid); vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz); if (IS_ERR(vldb)) { -- GitLab From 5559cea2d5aa3018a5f00dd2aca3427ba09b386b Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Thu, 15 Feb 2024 23:27:17 +0300 Subject: [PATCH 1249/1405] ipv6: sr: fix possible use-after-free and null-ptr-deref The pernet operations structure for the subsystem must be registered before registering the generic netlink family. Fixes: 915d7e5e5930 ("ipv6: sr: add code base for control plane support of SR-IPv6") Signed-off-by: Vasiliy Kovalev Link: https://lore.kernel.org/r/20240215202717.29815-1-kovalev@altlinux.org Signed-off-by: Paolo Abeni --- net/ipv6/seg6.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 29346a6eec9ff..35508abd76f43 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -512,22 +512,24 @@ int __init seg6_init(void) { int err; - err = genl_register_family(&seg6_genl_family); + err = register_pernet_subsys(&ip6_segments_ops); if (err) goto out; - err = register_pernet_subsys(&ip6_segments_ops); + err = genl_register_family(&seg6_genl_family); if (err) - goto out_unregister_genl; + goto out_unregister_pernet; #ifdef CONFIG_IPV6_SEG6_LWTUNNEL err = seg6_iptunnel_init(); if (err) - goto out_unregister_pernet; + goto out_unregister_genl; err = seg6_local_init(); - if (err) - goto out_unregister_pernet; + if (err) { + seg6_iptunnel_exit(); + goto out_unregister_genl; + } #endif #ifdef CONFIG_IPV6_SEG6_HMAC @@ -548,11 +550,11 @@ int __init seg6_init(void) #endif #endif #ifdef CONFIG_IPV6_SEG6_LWTUNNEL -out_unregister_pernet: - unregister_pernet_subsys(&ip6_segments_ops); -#endif out_unregister_genl: genl_unregister_family(&seg6_genl_family); +#endif +out_unregister_pernet: + unregister_pernet_subsys(&ip6_segments_ops); goto out; } -- GitLab From def689fc26b9a9622d2e2cb0c4933dd3b1c8071c Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Thu, 15 Feb 2024 23:34:00 +0300 Subject: [PATCH 1250/1405] devlink: fix possible use-after-free and memory leaks in devlink_init() The pernet operations structure for the subsystem must be registered before registering the generic netlink family. Make an unregister in case of unsuccessful registration. Fixes: 687125b5799c ("devlink: split out core code") Signed-off-by: Vasiliy Kovalev Link: https://lore.kernel.org/r/20240215203400.29976-1-kovalev@altlinux.org Signed-off-by: Paolo Abeni --- net/devlink/core.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/devlink/core.c b/net/devlink/core.c index 6a58342752b46..7f0b093208d75 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -529,14 +529,20 @@ static int __init devlink_init(void) { int err; - err = genl_register_family(&devlink_nl_family); - if (err) - goto out; err = register_pernet_subsys(&devlink_pernet_ops); if (err) goto out; + err = genl_register_family(&devlink_nl_family); + if (err) + goto out_unreg_pernet_subsys; err = register_netdevice_notifier(&devlink_port_netdevice_nb); + if (!err) + return 0; + + genl_unregister_family(&devlink_nl_family); +out_unreg_pernet_subsys: + unregister_pernet_subsys(&devlink_pernet_ops); out: WARN_ON(err); return err; -- GitLab From a7d6027790acea24446ddd6632d394096c0f4667 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 15 Feb 2024 15:05:16 -0800 Subject: [PATCH 1251/1405] arp: Prevent overflow in arp_req_get(). syzkaller reported an overflown write in arp_req_get(). [0] When ioctl(SIOCGARP) is issued, arp_req_get() looks up an neighbour entry and copies neigh->ha to struct arpreq.arp_ha.sa_data. The arp_ha here is struct sockaddr, not struct sockaddr_storage, so the sa_data buffer is just 14 bytes. In the splat below, 2 bytes are overflown to the next int field, arp_flags. We initialise the field just after the memcpy(), so it's not a problem. However, when dev->addr_len is greater than 22 (e.g. MAX_ADDR_LEN), arp_netmask is overwritten, which could be set as htonl(0xFFFFFFFFUL) in arp_ioctl() before calling arp_req_get(). To avoid the overflow, let's limit the max length of memcpy(). Note that commit b5f0de6df6dc ("net: dev: Convert sa_data to flexible array in struct sockaddr") just silenced syzkaller. [0]: memcpy: detected field-spanning write (size 16) of single field "r->arp_ha.sa_data" at net/ipv4/arp.c:1128 (size 14) WARNING: CPU: 0 PID: 144638 at net/ipv4/arp.c:1128 arp_req_get+0x411/0x4a0 net/ipv4/arp.c:1128 Modules linked in: CPU: 0 PID: 144638 Comm: syz-executor.4 Not tainted 6.1.74 #31 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-5 04/01/2014 RIP: 0010:arp_req_get+0x411/0x4a0 net/ipv4/arp.c:1128 Code: fd ff ff e8 41 42 de fb b9 0e 00 00 00 4c 89 fe 48 c7 c2 20 6d ab 87 48 c7 c7 80 6d ab 87 c6 05 25 af 72 04 01 e8 5f 8d ad fb <0f> 0b e9 6c fd ff ff e8 13 42 de fb be 03 00 00 00 4c 89 e7 e8 a6 RSP: 0018:ffffc900050b7998 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff88803a815000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff8641a44a RDI: 0000000000000001 RBP: ffffc900050b7a98 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 203a7970636d656d R12: ffff888039c54000 R13: 1ffff92000a16f37 R14: ffff88803a815084 R15: 0000000000000010 FS: 00007f172bf306c0(0000) GS:ffff88805aa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f172b3569f0 CR3: 0000000057f12005 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: arp_ioctl+0x33f/0x4b0 net/ipv4/arp.c:1261 inet_ioctl+0x314/0x3a0 net/ipv4/af_inet.c:981 sock_do_ioctl+0xdf/0x260 net/socket.c:1204 sock_ioctl+0x3ef/0x650 net/socket.c:1321 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl fs/ioctl.c:856 [inline] __x64_sys_ioctl+0x18e/0x220 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x37/0x90 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x64/0xce RIP: 0033:0x7f172b262b8d Code: 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f172bf300b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007f172b3abf80 RCX: 00007f172b262b8d RDX: 0000000020000000 RSI: 0000000000008954 RDI: 0000000000000003 RBP: 00007f172b2d3493 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007f172b3abf80 R15: 00007f172bf10000 Reported-by: syzkaller Reported-by: Bjoern Doebel Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240215230516.31330-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- net/ipv4/arp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 9456f5bb35e5d..0d0d725b46ad0 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1125,7 +1125,8 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) if (neigh) { if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) { read_lock_bh(&neigh->lock); - memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); + memcpy(r->arp_ha.sa_data, neigh->ha, + min(dev->addr_len, sizeof(r->arp_ha.sa_data_min))); r->arp_flags = arp_state_to_flags(neigh); read_unlock_bh(&neigh->lock); r->arp_ha.sa_family = dev->type; -- GitLab From 23f9c2c066e7e5052406fb8f04a115d3d0260b22 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 16 Feb 2024 08:19:45 -0800 Subject: [PATCH 1252/1405] docs: netdev: update the link to the CI repo Netronome graciously transferred the original NIPA repo to our new netdev umbrella org. Link to that instead of my private fork. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240216161945.2208842-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- Documentation/process/maintainer-netdev.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index 84ee60fceef24..fd96e4a3cef9c 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -431,7 +431,7 @@ patchwork checks Checks in patchwork are mostly simple wrappers around existing kernel scripts, the sources are available at: -https://github.com/kuba-moo/nipa/tree/master/tests +https://github.com/linux-netdev/nipa/tree/master/tests **Do not** post your patches just to run them through the checks. You must ensure that your patches are ready by testing them locally -- GitLab From 20c8c4dafe93e82441583e93bd68c0d256d7bed4 Mon Sep 17 00:00:00 2001 From: Amit Machhiwal Date: Wed, 7 Feb 2024 11:15:26 +0530 Subject: [PATCH 1253/1405] KVM: PPC: Book3S HV: Fix L2 guest reboot failure due to empty 'arch_compat' Currently, rebooting a pseries nested qemu-kvm guest (L2) results in below error as L1 qemu sends PVR value 'arch_compat' == 0 via ppc_set_compat ioctl. This triggers a condition failure in kvmppc_set_arch_compat() resulting in an EINVAL. qemu-system-ppc64: Unable to set CPU compatibility mode in KVM: Invalid argument Also, a value of 0 for arch_compat generally refers the default compatibility of the host. But, arch_compat, being a Guest Wide Element in nested API v2, cannot be set to 0 in GSB as PowerVM (L0) expects a non-zero value. A value of 0 triggers a kernel trap during a reboot and consequently causes it to fail: [ 22.106360] reboot: Restarting system KVM: unknown exit, hardware reason ffffffffffffffea NIP 0000000000000100 LR 000000000000fe44 CTR 0000000000000000 XER 0000000020040092 CPU#0 MSR 0000000000001000 HID0 0000000000000000 HF 6c000000 iidx 3 didx 3 TB 00000000 00000000 DECR 0 GPR00 0000000000000000 0000000000000000 c000000002a8c300 000000007fe00000 GPR04 0000000000000000 0000000000000000 0000000000001002 8000000002803033 GPR08 000000000a000000 0000000000000000 0000000000000004 000000002fff0000 GPR12 0000000000000000 c000000002e10000 0000000105639200 0000000000000004 GPR16 0000000000000000 000000010563a090 0000000000000000 0000000000000000 GPR20 0000000105639e20 00000001056399c8 00007fffe54abab0 0000000105639288 GPR24 0000000000000000 0000000000000001 0000000000000001 0000000000000000 GPR28 0000000000000000 0000000000000000 c000000002b30840 0000000000000000 CR 00000000 [ - - - - - - - - ] RES 000@ffffffffffffffff SRR0 0000000000000000 SRR1 0000000000000000 PVR 0000000000800200 VRSAVE 0000000000000000 SPRG0 0000000000000000 SPRG1 0000000000000000 SPRG2 0000000000000000 SPRG3 0000000000000000 SPRG4 0000000000000000 SPRG5 0000000000000000 SPRG6 0000000000000000 SPRG7 0000000000000000 HSRR0 0000000000000000 HSRR1 0000000000000000 CFAR 0000000000000000 LPCR 0000000000020400 PTCR 0000000000000000 DAR 0000000000000000 DSISR 0000000000000000 kernel:trap=0xffffffea | pc=0x100 | msr=0x1000 This patch updates kvmppc_set_arch_compat() to use the host PVR value if 'compat_pvr' == 0 indicating that qemu doesn't want to enforce any specific PVR compat mode. The relevant part of the code might need a rework if PowerVM implements a support for `arch_compat == 0` in nestedv2 API. Fixes: 19d31c5f1157 ("KVM: PPC: Add support for nestedv2 guests") Reviewed-by: "Aneesh Kumar K.V (IBM)" Reviewed-by: Vaibhav Jain Signed-off-by: Amit Machhiwal Signed-off-by: Michael Ellerman Link: https://msgid.link/20240207054526.3720087-1-amachhiw@linux.ibm.com --- arch/powerpc/kvm/book3s_hv.c | 26 ++++++++++++++++++++++++-- arch/powerpc/kvm/book3s_hv_nestedv2.c | 20 ++++++++++++++++++-- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 52427fc2a33fa..0b921704da45e 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -391,6 +391,24 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) /* Dummy value used in computing PCR value below */ #define PCR_ARCH_31 (PCR_ARCH_300 << 1) +static inline unsigned long map_pcr_to_cap(unsigned long pcr) +{ + unsigned long cap = 0; + + switch (pcr) { + case PCR_ARCH_300: + cap = H_GUEST_CAP_POWER9; + break; + case PCR_ARCH_31: + cap = H_GUEST_CAP_POWER10; + break; + default: + break; + } + + return cap; +} + static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) { unsigned long host_pcr_bit = 0, guest_pcr_bit = 0, cap = 0; @@ -424,11 +442,9 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) break; case PVR_ARCH_300: guest_pcr_bit = PCR_ARCH_300; - cap = H_GUEST_CAP_POWER9; break; case PVR_ARCH_31: guest_pcr_bit = PCR_ARCH_31; - cap = H_GUEST_CAP_POWER10; break; default: return -EINVAL; @@ -440,6 +456,12 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) return -EINVAL; if (kvmhv_on_pseries() && kvmhv_is_nestedv2()) { + /* + * 'arch_compat == 0' would mean the guest should default to + * L1's compatibility. In this case, the guest would pick + * host's PCR and evaluate the corresponding capabilities. + */ + cap = map_pcr_to_cap(guest_pcr_bit); if (!(cap & nested_capabilities)) return -EINVAL; } diff --git a/arch/powerpc/kvm/book3s_hv_nestedv2.c b/arch/powerpc/kvm/book3s_hv_nestedv2.c index 5378eb40b162f..8e6f5355f08b5 100644 --- a/arch/powerpc/kvm/book3s_hv_nestedv2.c +++ b/arch/powerpc/kvm/book3s_hv_nestedv2.c @@ -138,6 +138,7 @@ static int gs_msg_ops_vcpu_fill_info(struct kvmppc_gs_buff *gsb, vector128 v; int rc, i; u16 iden; + u32 arch_compat = 0; vcpu = gsm->data; @@ -347,8 +348,23 @@ static int gs_msg_ops_vcpu_fill_info(struct kvmppc_gs_buff *gsb, break; } case KVMPPC_GSID_LOGICAL_PVR: - rc = kvmppc_gse_put_u32(gsb, iden, - vcpu->arch.vcore->arch_compat); + /* + * Though 'arch_compat == 0' would mean the default + * compatibility, arch_compat, being a Guest Wide + * Element, cannot be filled with a value of 0 in GSB + * as this would result into a kernel trap. + * Hence, when `arch_compat == 0`, arch_compat should + * default to L1's PVR. + */ + if (!vcpu->arch.vcore->arch_compat) { + if (cpu_has_feature(CPU_FTR_ARCH_31)) + arch_compat = PVR_ARCH_31; + else if (cpu_has_feature(CPU_FTR_ARCH_300)) + arch_compat = PVR_ARCH_300; + } else { + arch_compat = vcpu->arch.vcore->arch_compat; + } + rc = kvmppc_gse_put_u32(gsb, iden, arch_compat); break; } -- GitLab From ae366ba8576da0135d7d3db2dfa6304f3338d0c2 Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Mon, 19 Feb 2024 18:25:13 +0100 Subject: [PATCH 1254/1405] gpiolib: Handle no pin_ranges in gpiochip_generic_config() Similar to gpiochip_generic_request() and gpiochip_generic_free() the gpiochip_generic_config() function needs to handle the case where there are no pinctrl pins mapped to the GPIOs, usually through the gpio-ranges device tree property. Commit f34fd6ee1be8 ("gpio: dwapb: Use generic request, free and set_config") set the .set_config callback to gpiochip_generic_config() in the dwapb GPIO driver so the GPIO API can set pinctrl configuration for the corresponding pins. Most boards using the dwapb driver do not set the gpio-ranges device tree property though, and in this case gpiochip_generic_config() would return -EPROPE_DEFER rather than the previous -ENOTSUPP return value. This in turn makes gpio_set_config_with_argument_optional() fail and propagate the error to any driver requesting GPIOs. Fixes: 2956b5d94a76 ("pinctrl / gpio: Introduce .set_config() callback for GPIO chips") Reported-by: Jisheng Zhang Closes: https://lore.kernel.org/linux-gpio/ZdC_g3U4l0CJIWzh@xhacker/ Tested-by: Jisheng Zhang Signed-off-by: Emil Renner Berthing Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 8b3a0f45b5745..e434e8cc1229c 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2042,6 +2042,11 @@ EXPORT_SYMBOL_GPL(gpiochip_generic_free); int gpiochip_generic_config(struct gpio_chip *gc, unsigned int offset, unsigned long config) { +#ifdef CONFIG_PINCTRL + if (list_empty(&gc->gpiodev->pin_ranges)) + return -ENOTSUPP; +#endif + return pinctrl_gpio_set_config(gc, offset, config); } EXPORT_SYMBOL_GPL(gpiochip_generic_config); -- GitLab From 802379b8f9e169293e9ba7089e5f1a6340e2e7a3 Mon Sep 17 00:00:00 2001 From: Hojin Nam Date: Fri, 16 Feb 2024 10:45:22 +0900 Subject: [PATCH 1255/1405] perf: CXL: fix CPMU filter value mask length CPMU filter value is described as 4B length in CXL r3.0 8.2.7.2.2. However, it is used as 2B length in code and comments. Reviewed-by: Jonathan Cameron Signed-off-by: Hojin Nam Link: https://lore.kernel.org/r/20240216014522.32321-1-hj96.nam@samsung.com Signed-off-by: Will Deacon --- drivers/perf/cxl_pmu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c index bc0d414a6aff9..308c9969642e1 100644 --- a/drivers/perf/cxl_pmu.c +++ b/drivers/perf/cxl_pmu.c @@ -59,7 +59,7 @@ #define CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK GENMASK_ULL(63, 59) #define CXL_PMU_FILTER_CFG_REG(n, f) (0x400 + 4 * ((f) + (n) * 8)) -#define CXL_PMU_FILTER_CFG_VALUE_MSK GENMASK(15, 0) +#define CXL_PMU_FILTER_CFG_VALUE_MSK GENMASK(31, 0) #define CXL_PMU_COUNTER_REG(n) (0xc00 + 8 * (n)) @@ -314,9 +314,9 @@ static bool cxl_pmu_config1_get_edge(struct perf_event *event) } /* - * CPMU specification allows for 8 filters, each with a 16 bit value... - * So we need to find 8x16bits to store it in. - * As the value used for disable is 0xffff, a separate enable switch + * CPMU specification allows for 8 filters, each with a 32 bit value... + * So we need to find 8x32bits to store it in. + * As the value used for disable is 0xffff_ffff, a separate enable switch * is needed. */ @@ -642,7 +642,7 @@ static void cxl_pmu_event_start(struct perf_event *event, int flags) if (cxl_pmu_config1_hdm_filter_en(event)) cfg = cxl_pmu_config2_get_hdm_decoder(event); else - cfg = GENMASK(15, 0); /* No filtering if 0xFFFF_FFFF */ + cfg = GENMASK(31, 0); /* No filtering if 0xFFFF_FFFF */ writeq(cfg, base + CXL_PMU_FILTER_CFG_REG(hwc->idx, 0)); } -- GitLab From a6b3eb304a82c29665a0ab947cfe276f6d29f523 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Feb 2024 12:05:47 +0000 Subject: [PATCH 1256/1405] Revert "arm64: jump_label: use constraints "Si" instead of "i"" This reverts commit f9daab0ad01cf9d165dbbbf106ca4e61d06e7fe8. Geert reports that his particular GCC 5.5 vintage toolchain fails to build an arm64 defconfig because of this change: | arch/arm64/include/asm/jump_label.h:25:2: error: invalid 'asm': | invalid operand | asm goto( ^ Aopparently, this is something we claim to support, so let's revert back to the old jump label constraint for now while discussions about raising the minimum GCC version are ongoing. Reported-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/CAMuHMdX+6fnAf8Hm6EqYJPAjrrLO9T7c=Gu3S8V_pqjSDowJ6g@mail.gmail.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/jump_label.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index b7716b215f91a..48ddc0f45d228 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -15,10 +15,6 @@ #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE -/* - * Prefer the constraint "S" to support PIC with GCC. Clang before 19 does not - * support "S" on a symbol with a constant offset, so we use "i" as a fallback. - */ static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { @@ -27,9 +23,9 @@ static __always_inline bool arch_static_branch(struct static_key * const key, " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" " .long 1b - ., %l[l_yes] - . \n\t" - " .quad (%[key] - .) + %[bit0] \n\t" + " .quad %c0 - . \n\t" " .popsection \n\t" - : : [key]"Si"(key), [bit0]"i"(branch) : : l_yes); + : : "i"(&((char *)key)[branch]) : : l_yes); return false; l_yes: @@ -44,9 +40,9 @@ static __always_inline bool arch_static_branch_jump(struct static_key * const ke " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" " .long 1b - ., %l[l_yes] - . \n\t" - " .quad (%[key] - .) + %[bit0] \n\t" + " .quad %c0 - . \n\t" " .popsection \n\t" - : : [key]"Si"(key), [bit0]"i"(branch) : : l_yes); + : : "i"(&((char *)key)[branch]) : : l_yes); return false; l_yes: -- GitLab From 9533864816fb4a6207c63b7a98396351ce1a9fae Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 Feb 2024 23:06:32 +0000 Subject: [PATCH 1257/1405] arm64/sme: Restore SME registers on exit from suspend The fields in SMCR_EL1 and SMPRI_EL1 reset to an architecturally UNKNOWN value. Since we do not otherwise manage the traps configured in this register at runtime we need to reconfigure them after a suspend in case nothing else was kind enough to preserve them for us. The vector length will be restored as part of restoring the SME state for the next SME using task. Fixes: a1f4ccd25cc2 ("arm64/sme: Provide Kconfig for SME") Reported-by: Jackson Cooper-Driver Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20240213-arm64-sme-resume-v3-1-17e05e493471@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 2 ++ arch/arm64/kernel/fpsimd.c | 14 ++++++++++++++ arch/arm64/kernel/suspend.c | 3 +++ 3 files changed, 19 insertions(+) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 481d94416d696..b67b89c54e1c8 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -386,6 +386,7 @@ extern void sme_alloc(struct task_struct *task, bool flush); extern unsigned int sme_get_vl(void); extern int sme_set_current_vl(unsigned long arg); extern int sme_get_current_vl(void); +extern void sme_suspend_exit(void); /* * Return how many bytes of memory are required to store the full SME @@ -421,6 +422,7 @@ static inline int sme_max_vl(void) { return 0; } static inline int sme_max_virtualisable_vl(void) { return 0; } static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; } static inline int sme_get_current_vl(void) { return -EINVAL; } +static inline void sme_suspend_exit(void) { } static inline size_t sme_state_size(struct task_struct const *task) { diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 25ceaee6b025d..d52592088afae 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1311,6 +1311,20 @@ void __init sme_setup(void) get_sme_default_vl()); } +void sme_suspend_exit(void) +{ + u64 smcr = 0; + + if (!system_supports_sme()) + return; + + if (system_supports_fa64()) + smcr |= SMCR_ELx_FA64; + + write_sysreg_s(smcr, SYS_SMCR_EL1); + write_sysreg_s(0, SYS_SMPRI_EL1); +} + #endif /* CONFIG_ARM64_SME */ static void sve_init_regs(void) diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index eca4d04352118..eaaff94329cdd 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -80,6 +81,8 @@ void notrace __cpu_suspend_exit(void) */ spectre_v4_enable_mitigation(NULL); + sme_suspend_exit(); + /* Restore additional feature-specific configuration */ ptrauth_suspend_exit(); } -- GitLab From d7b77a0d565b048cb0808fa8a4fb031352b22a01 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 Feb 2024 23:06:33 +0000 Subject: [PATCH 1258/1405] arm64/sme: Restore SMCR_EL1.EZT0 on exit from suspend The fields in SMCR_EL1 reset to an architecturally UNKNOWN value. Since we do not otherwise manage the traps configured in this register at runtime we need to reconfigure them after a suspend in case nothing else was kind enough to preserve them for us. Do so for SMCR_EL1.EZT0. Fixes: d4913eee152d ("arm64/sme: Add basic enumeration for SME2") Reported-by: Jackson Cooper-Driver Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20240213-arm64-sme-resume-v3-2-17e05e493471@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index d52592088afae..f27acca550d55 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1320,6 +1320,8 @@ void sme_suspend_exit(void) if (system_supports_fa64()) smcr |= SMCR_ELx_FA64; + if (system_supports_sme2()) + smcr |= SMCR_ELx_EZT0; write_sysreg_s(smcr, SYS_SMCR_EL1); write_sysreg_s(0, SYS_SMPRI_EL1); -- GitLab From bd8905d70944aae5063fd91c667e6f846ee92718 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 16 Feb 2024 21:17:18 +0100 Subject: [PATCH 1259/1405] platform/x86: x86-android-tablets: Fix keyboard touchscreen on Lenovo Yogabook1 X90 After commit 4014ae236b1d ("platform/x86: x86-android-tablets: Stop using gpiolib private APIs") the touchscreen in the keyboard half of the Lenovo Yogabook1 X90 stopped working with the following error: Goodix-TS i2c-goodix_ts: error -EBUSY: Failed to get irq GPIO The problem is that when getting the IRQ for instantiated i2c_client-s from a GPIO (rather then using an IRQ directly from the IOAPIC), x86_acpi_irq_helper_get() now properly requests the GPIO, which disallows other drivers from requesting it. Normally this is a good thing, but the goodix touchscreen also uses the IRQ as an output during reset to select which of its 2 possible I2C addresses should be used. Add a new free_gpio flag to struct x86_acpi_irq_data to deal with this and release the GPIO after getting the IRQ in this special case. Fixes: 4014ae236b1d ("platform/x86: x86-android-tablets: Stop using gpiolib private APIs") Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240216201721.239791-2-hdegoede@redhat.com --- drivers/platform/x86/x86-android-tablets/core.c | 3 +++ drivers/platform/x86/x86-android-tablets/lenovo.c | 1 + drivers/platform/x86/x86-android-tablets/x86-android-tablets.h | 1 + 3 files changed, 5 insertions(+) diff --git a/drivers/platform/x86/x86-android-tablets/core.c b/drivers/platform/x86/x86-android-tablets/core.c index f8221a15575b3..f6547c9d75843 100644 --- a/drivers/platform/x86/x86-android-tablets/core.c +++ b/drivers/platform/x86/x86-android-tablets/core.c @@ -113,6 +113,9 @@ int x86_acpi_irq_helper_get(const struct x86_acpi_irq_data *data) if (irq_type != IRQ_TYPE_NONE && irq_type != irq_get_trigger_type(irq)) irq_set_irq_type(irq, irq_type); + if (data->free_gpio) + devm_gpiod_put(&x86_android_tablet_device->dev, gpiod); + return irq; case X86_ACPI_IRQ_TYPE_PMIC: status = acpi_get_handle(NULL, data->chip, &handle); diff --git a/drivers/platform/x86/x86-android-tablets/lenovo.c b/drivers/platform/x86/x86-android-tablets/lenovo.c index f1c66a61bfc52..c297391955adb 100644 --- a/drivers/platform/x86/x86-android-tablets/lenovo.c +++ b/drivers/platform/x86/x86-android-tablets/lenovo.c @@ -116,6 +116,7 @@ static const struct x86_i2c_client_info lenovo_yb1_x90_i2c_clients[] __initconst .trigger = ACPI_EDGE_SENSITIVE, .polarity = ACPI_ACTIVE_LOW, .con_id = "goodix_ts_irq", + .free_gpio = true, }, }, { /* Wacom Digitizer in keyboard half */ diff --git a/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h b/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h index 49fed9410adba..468993edfeee2 100644 --- a/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h +++ b/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h @@ -39,6 +39,7 @@ struct x86_acpi_irq_data { int index; int trigger; /* ACPI_EDGE_SENSITIVE / ACPI_LEVEL_SENSITIVE */ int polarity; /* ACPI_ACTIVE_HIGH / ACPI_ACTIVE_LOW / ACPI_ACTIVE_BOTH */ + bool free_gpio; /* Release GPIO after getting IRQ (for TYPE_GPIOINT) */ const char *con_id; }; -- GitLab From dc5afd720f84de3c1f5d700eb0b858006a2dc468 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 16 Feb 2024 21:17:19 +0100 Subject: [PATCH 1260/1405] platform/x86: Add new get_serdev_controller() helper In some cases UART attached devices which require an in kernel driver, e.g. UART attached Bluetooth HCIs are described in the ACPI tables by an ACPI device with a broken or missing UartSerialBusV2() resource. This causes the kernel to create a /dev/ttyS# char-device for the UART instead of creating an in kernel serdev-controller + serdev-device pair for the in kernel driver. The quirk handling in acpi_quirk_skip_serdev_enumeration() makes the kernel create a serdev-controller device for these UARTs instead of a /dev/ttyS#. Instantiating the actual serdev-device to bind to is up to pdx86 code, so far this was handled by the x86-android-tablets code. But since commit b286f4e87e32 ("serial: core: Move tty and serdev to be children of serial core port device") the serdev-controller device has moved in the device hierarchy from (e.g.) /sys/devices/pci0000:00/8086228A:00/serial0 to /sys/devices/pci0000:00/8086228A:00/8086228A:00:0/8086228A:00:0.0/serial0 . This makes this a bit trickier to do and another driver is in the works which will also need this functionality. Add a new helper to get the serdev-controller device, so that the new code for this can be shared. Fixes: b286f4e87e32 ("serial: core: Move tty and serdev to be children of serial core port device") Cc: Tony Lindgren Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240216201721.239791-3-hdegoede@redhat.com --- drivers/platform/x86/serdev_helpers.h | 80 +++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 drivers/platform/x86/serdev_helpers.h diff --git a/drivers/platform/x86/serdev_helpers.h b/drivers/platform/x86/serdev_helpers.h new file mode 100644 index 0000000000000..bcf3a0c356ea1 --- /dev/null +++ b/drivers/platform/x86/serdev_helpers.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * In some cases UART attached devices which require an in kernel driver, + * e.g. UART attached Bluetooth HCIs are described in the ACPI tables + * by an ACPI device with a broken or missing UartSerialBusV2() resource. + * + * This causes the kernel to create a /dev/ttyS# char-device for the UART + * instead of creating an in kernel serdev-controller + serdev-device pair + * for the in kernel driver. + * + * The quirk handling in acpi_quirk_skip_serdev_enumeration() makes the kernel + * create a serdev-controller device for these UARTs instead of a /dev/ttyS#. + * + * Instantiating the actual serdev-device to bind to is up to pdx86 code, + * this header provides a helper for getting the serdev-controller device. + */ +#include +#include +#include +#include +#include +#include + +static inline struct device * +get_serdev_controller(const char *serial_ctrl_hid, + const char *serial_ctrl_uid, + int serial_ctrl_port, + const char *serdev_ctrl_name) +{ + struct device *ctrl_dev, *child; + struct acpi_device *ctrl_adev; + char name[32]; + int i; + + ctrl_adev = acpi_dev_get_first_match_dev(serial_ctrl_hid, serial_ctrl_uid, -1); + if (!ctrl_adev) { + pr_err("error could not get %s/%s serial-ctrl adev\n", + serial_ctrl_hid, serial_ctrl_uid); + return ERR_PTR(-ENODEV); + } + + /* get_first_physical_node() returns a weak ref */ + ctrl_dev = get_device(acpi_get_first_physical_node(ctrl_adev)); + if (!ctrl_dev) { + pr_err("error could not get %s/%s serial-ctrl physical node\n", + serial_ctrl_hid, serial_ctrl_uid); + ctrl_dev = ERR_PTR(-ENODEV); + goto put_ctrl_adev; + } + + /* Walk host -> uart-ctrl -> port -> serdev-ctrl */ + for (i = 0; i < 3; i++) { + switch (i) { + case 0: + snprintf(name, sizeof(name), "%s:0", dev_name(ctrl_dev)); + break; + case 1: + snprintf(name, sizeof(name), "%s.%d", + dev_name(ctrl_dev), serial_ctrl_port); + break; + case 2: + strscpy(name, serdev_ctrl_name, sizeof(name)); + break; + } + + child = device_find_child_by_name(ctrl_dev, name); + put_device(ctrl_dev); + if (!child) { + pr_err("error could not find '%s' device\n", name); + ctrl_dev = ERR_PTR(-ENODEV); + goto put_ctrl_adev; + } + + ctrl_dev = child; + } + +put_ctrl_adev: + acpi_dev_put(ctrl_adev); + return ctrl_dev; +} -- GitLab From 812a79b52b92345d777b6377fa538747d366b6ce Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 16 Feb 2024 21:17:20 +0100 Subject: [PATCH 1261/1405] platform/x86: x86-android-tablets: Fix serdev instantiation no longer working After commit b286f4e87e32 ("serial: core: Move tty and serdev to be children of serial core port device") x86_instantiate_serdev() no longer works due to the serdev-controller-device moving in the device hierarchy from (e.g.) /sys/devices/pci0000:00/8086228A:00/serial0 to /sys/devices/pci0000:00/8086228A:00/8086228A:00:0/8086228A:00:0.0/serial0 Use the new get_serdev_controller() helper function to fix this. Fixes: b286f4e87e32 ("serial: core: Move tty and serdev to be children of serial core port device") Cc: Tony Lindgren Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240216201721.239791-4-hdegoede@redhat.com --- .../platform/x86/x86-android-tablets/core.c | 35 +++++-------------- 1 file changed, 9 insertions(+), 26 deletions(-) diff --git a/drivers/platform/x86/x86-android-tablets/core.c b/drivers/platform/x86/x86-android-tablets/core.c index f6547c9d75843..a3415f1c0b5f8 100644 --- a/drivers/platform/x86/x86-android-tablets/core.c +++ b/drivers/platform/x86/x86-android-tablets/core.c @@ -21,6 +21,7 @@ #include #include "x86-android-tablets.h" +#include "../serdev_helpers.h" static struct platform_device *x86_android_tablet_device; @@ -232,38 +233,20 @@ static __init int x86_instantiate_spi_dev(const struct x86_dev_info *dev_info, i static __init int x86_instantiate_serdev(const struct x86_serdev_info *info, int idx) { - struct acpi_device *ctrl_adev, *serdev_adev; + struct acpi_device *serdev_adev; struct serdev_device *serdev; struct device *ctrl_dev; int ret = -ENODEV; - ctrl_adev = acpi_dev_get_first_match_dev(info->ctrl_hid, info->ctrl_uid, -1); - if (!ctrl_adev) { - pr_err("error could not get %s/%s ctrl adev\n", - info->ctrl_hid, info->ctrl_uid); - return -ENODEV; - } + ctrl_dev = get_serdev_controller(info->ctrl_hid, info->ctrl_uid, 0, + info->ctrl_devname); + if (IS_ERR(ctrl_dev)) + return PTR_ERR(ctrl_dev); serdev_adev = acpi_dev_get_first_match_dev(info->serdev_hid, NULL, -1); if (!serdev_adev) { pr_err("error could not get %s serdev adev\n", info->serdev_hid); - goto put_ctrl_adev; - } - - /* get_first_physical_node() returns a weak ref, no need to put() it */ - ctrl_dev = acpi_get_first_physical_node(ctrl_adev); - if (!ctrl_dev) { - pr_err("error could not get %s/%s ctrl physical dev\n", - info->ctrl_hid, info->ctrl_uid); - goto put_serdev_adev; - } - - /* ctrl_dev now points to the controller's parent, get the controller */ - ctrl_dev = device_find_child_by_name(ctrl_dev, info->ctrl_devname); - if (!ctrl_dev) { - pr_err("error could not get %s/%s %s ctrl dev\n", - info->ctrl_hid, info->ctrl_uid, info->ctrl_devname); - goto put_serdev_adev; + goto put_ctrl_dev; } serdev = serdev_device_alloc(to_serdev_controller(ctrl_dev)); @@ -286,8 +269,8 @@ static __init int x86_instantiate_serdev(const struct x86_serdev_info *info, int put_serdev_adev: acpi_dev_put(serdev_adev); -put_ctrl_adev: - acpi_dev_put(ctrl_adev); +put_ctrl_dev: + put_device(ctrl_dev); return ret; } -- GitLab From 8215ca518164d35f10c0b5545c8bb80f538638b8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 16 Feb 2024 21:17:21 +0100 Subject: [PATCH 1262/1405] platform/x86: x86-android-tablets: Fix acer_b1_750_goodix_gpios name The Acer B1 750 tablet used a Novatek NVT-ts touchscreen, not a Goodix touchscreen. Rename acer_b1_750_goodix_gpios to acer_b1_750_nvt_ts_gpios to correctly reflect this. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240216201721.239791-5-hdegoede@redhat.com --- drivers/platform/x86/x86-android-tablets/other.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/x86-android-tablets/other.c b/drivers/platform/x86/x86-android-tablets/other.c index bc6bbf7ec6ea1..278402dcb808c 100644 --- a/drivers/platform/x86/x86-android-tablets/other.c +++ b/drivers/platform/x86/x86-android-tablets/other.c @@ -68,7 +68,7 @@ static const struct x86_i2c_client_info acer_b1_750_i2c_clients[] __initconst = }, }; -static struct gpiod_lookup_table acer_b1_750_goodix_gpios = { +static struct gpiod_lookup_table acer_b1_750_nvt_ts_gpios = { .dev_id = "i2c-NVT-ts", .table = { GPIO_LOOKUP("INT33FC:01", 26, "reset", GPIO_ACTIVE_LOW), @@ -77,7 +77,7 @@ static struct gpiod_lookup_table acer_b1_750_goodix_gpios = { }; static struct gpiod_lookup_table * const acer_b1_750_gpios[] = { - &acer_b1_750_goodix_gpios, + &acer_b1_750_nvt_ts_gpios, &int3496_reference_gpios, NULL }; -- GitLab From 84c16d01ff219bc0a5dca5219db6b8b86a6854fb Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 16 Feb 2024 21:33:00 +0100 Subject: [PATCH 1263/1405] platform/x86: intel-vbtn: Stop calling "VBDL" from notify_handler Commit 14c200b7ca46 ("platform/x86: intel-vbtn: Fix missing tablet-mode-switch events") causes 2 issues on the ThinkPad X1 Tablet Gen2: 1. The ThinkPad will wake up immediately from suspend 2. When put in tablet mode SW_TABLET_MODE reverts to 0 after about 1 second Both these issues are caused by the "VBDL" ACPI method call added at the end of the notify_handler. And it never became entirely clear if this call is even necessary to fix the issue of missing tablet-mode-switch events on the Dell Inspiron 7352. Drop the "VBDL" ACPI method call again to fix the 2 issues this is causing on the ThinkPad X1 Tablet Gen2. Fixes: 14c200b7ca46 ("platform/x86: intel-vbtn: Fix missing tablet-mode-switch events") Reported-by: Alexander Kobel Closes: https://lore.kernel.org/platform-driver-x86/295984ce-bd4b-49bd-adc5-ffe7c898d7f0@a-kobel.de/ Cc: regressions@lists.linux.dev Cc: Arnold Gozum Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Tested-by: Alexander Kobel Link: https://lore.kernel.org/r/20240216203300.245826-1-hdegoede@redhat.com --- drivers/platform/x86/intel/vbtn.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index 210b0a81b7ecb..084c355c86f5f 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -200,9 +200,6 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) autorelease = val && (!ke_rel || ke_rel->type == KE_IGNORE); sparse_keymap_report_event(input_dev, event, val, autorelease); - - /* Some devices need this to report further events */ - acpi_evaluate_object(handle, "VBDL", NULL, NULL); } /* -- GitLab From 427c70dec738318b7f71e1b9d829ff0e9771d493 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 16 Feb 2024 20:23:11 -0600 Subject: [PATCH 1264/1405] platform/x86: thinkpad_acpi: Only update profile if successfully converted Randomly a Lenovo Z13 will trigger a kernel warning traceback from this condition: ``` if (WARN_ON((profile < 0) || (profile >= ARRAY_SIZE(profile_names)))) ``` This happens because thinkpad-acpi always assumes that convert_dytc_to_profile() successfully updated the profile. On the contrary a condition can occur that when dytc_profile_refresh() is called the profile doesn't get updated as there is a -EOPNOTSUPP branch. Catch this situation and avoid updating the profile. Also log this into dynamic debugging in case any other modes should be added in the future. Fixes: c3bfcd4c6762 ("platform/x86: thinkpad_acpi: Add platform profile support") Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20240217022311.113879-1-mario.limonciello@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index c4895e9bc7148..5ecd9d33250d7 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10308,6 +10308,7 @@ static int convert_dytc_to_profile(int funcmode, int dytcmode, return 0; default: /* Unknown function */ + pr_debug("unknown function 0x%x\n", funcmode); return -EOPNOTSUPP; } return 0; @@ -10493,8 +10494,8 @@ static void dytc_profile_refresh(void) return; perfmode = (output >> DYTC_GET_MODE_BIT) & 0xF; - convert_dytc_to_profile(funcmode, perfmode, &profile); - if (profile != dytc_current_profile) { + err = convert_dytc_to_profile(funcmode, perfmode, &profile); + if (!err && profile != dytc_current_profile) { dytc_current_profile = profile; platform_profile_notify(); } -- GitLab From eb0d253ff9c74dee30aa92fe460b825eb28acd73 Mon Sep 17 00:00:00 2001 From: Andrzej Kacprowski Date: Tue, 20 Feb 2024 14:16:24 +0100 Subject: [PATCH 1265/1405] accel/ivpu: Don't enable any tiles by default on VPU40xx There is no point in requesting 1 tile on VPU40xx as the FW will probably need more tiles to run workloads, so it will have to reconfigure PLL anyway. Don't enable any tiles and allow the FW to perform initial tile configuration. This improves NPU boot stability as the tiles are always enabled only by the FW from the same initial state. Fixes: 79cdc56c4a54 ("accel/ivpu: Add initial support for VPU 4") Cc: stable@vger.kernel.org Signed-off-by: Andrzej Kacprowski Signed-off-by: Jacek Lawrynowicz Reviewed-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20240220131624.1447813-1-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_40xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index 1c995307c1138..a1523d0b1ef36 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -24,7 +24,7 @@ #define SKU_HW_ID_SHIFT 16u #define SKU_HW_ID_MASK 0xffff0000u -#define PLL_CONFIG_DEFAULT 0x1 +#define PLL_CONFIG_DEFAULT 0x0 #define PLL_CDYN_DEFAULT 0x80 #define PLL_EPP_DEFAULT 0x80 #define PLL_REF_CLK_FREQ (50 * 1000000) -- GitLab From fdf87a0dc26d0550c60edc911cda42f9afec3557 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 5 Feb 2024 11:23:34 +0100 Subject: [PATCH 1266/1405] ARM: ep93xx: Add terminator to gpiod_lookup_table Without the terminator, if a con_id is passed to gpio_find() that does not exist in the lookup table the function will not stop looping correctly, and eventually cause an oops. Cc: stable@vger.kernel.org Fixes: b2e63555592f ("i2c: gpio: Convert to use descriptors") Reported-by: Andy Shevchenko Signed-off-by: Nikita Shubin Reviewed-by: Linus Walleij Acked-by: Alexander Sverdlin Signed-off-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20240205102337.439002-1-alexander.sverdlin@gmail.com Signed-off-by: Arnd Bergmann --- arch/arm/mach-ep93xx/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index 71b1139764204..8b1ec60a9a467 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -339,6 +339,7 @@ static struct gpiod_lookup_table ep93xx_i2c_gpiod_table = { GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), GPIO_LOOKUP_IDX("G", 0, NULL, 1, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), + { } }, }; -- GitLab From 944d5fe50f3f03daacfea16300e656a1691c4a23 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Feb 2024 15:25:12 +0000 Subject: [PATCH 1267/1405] sched/membarrier: reduce the ability to hammer on sys_membarrier On some systems, sys_membarrier can be very expensive, causing overall slowdowns for everything. So put a lock on the path in order to serialize the accesses to prevent the ability for this to be called at too high of a frequency and saturate the machine. Signed-off-by: Greg Kroah-Hartman Reviewed-and-tested-by: Mathieu Desnoyers Acked-by: Borislav Petkov Fixes: 22e4ebb97582 ("membarrier: Provide expedited private command") Fixes: c5f58bd58f43 ("membarrier: Provide GLOBAL_EXPEDITED command") Signed-off-by: Linus Torvalds --- kernel/sched/membarrier.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index 2ad881d07752c..4e715b9b278e7 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -162,6 +162,9 @@ | MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK \ | MEMBARRIER_CMD_GET_REGISTRATIONS) +static DEFINE_MUTEX(membarrier_ipi_mutex); +#define SERIALIZE_IPI() guard(mutex)(&membarrier_ipi_mutex) + static void ipi_mb(void *info) { smp_mb(); /* IPIs should be serializing but paranoid. */ @@ -259,6 +262,7 @@ static int membarrier_global_expedited(void) if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) return -ENOMEM; + SERIALIZE_IPI(); cpus_read_lock(); rcu_read_lock(); for_each_online_cpu(cpu) { @@ -347,6 +351,7 @@ static int membarrier_private_expedited(int flags, int cpu_id) if (cpu_id < 0 && !zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) return -ENOMEM; + SERIALIZE_IPI(); cpus_read_lock(); if (cpu_id >= 0) { @@ -460,6 +465,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm) * between threads which are users of @mm has its membarrier state * updated. */ + SERIALIZE_IPI(); cpus_read_lock(); rcu_read_lock(); for_each_online_cpu(cpu) { -- GitLab From c88f5e553fe38b2ffc4c33d08654e5281b297677 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 19 Feb 2024 21:27:39 +0100 Subject: [PATCH 1268/1405] dm-integrity: recheck the integrity tag after a failure If a userspace process reads (with O_DIRECT) multiple blocks into the same buffer, dm-integrity reports an error [1]. The error is reported in a log and it may cause RAID leg being kicked out of the array. This commit fixes dm-integrity, so that if integrity verification fails, the data is read again into a kernel buffer (where userspace can't modify it) and the integrity tag is rechecked. If the recheck succeeds, the content of the kernel buffer is copied into the user buffer; if the recheck fails, an integrity error is reported. [1] https://people.redhat.com/~mpatocka/testcases/blk-auth-modify/read2.c Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 93 +++++++++++++++++++++++++++++++++++---- 1 file changed, 84 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index c5f03aab45525..e46c798c5c364 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -278,6 +278,8 @@ struct dm_integrity_c { atomic64_t number_of_mismatches; + mempool_t recheck_pool; + struct notifier_block reboot_notifier; }; @@ -1689,6 +1691,79 @@ static void integrity_sector_checksum(struct dm_integrity_c *ic, sector_t sector get_random_bytes(result, ic->tag_size); } +static void integrity_recheck(struct dm_integrity_io *dio) +{ + struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); + struct dm_integrity_c *ic = dio->ic; + struct bvec_iter iter; + struct bio_vec bv; + sector_t sector, logical_sector, area, offset; + char checksum_onstack[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; + struct page *page; + void *buffer; + + get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); + dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, + &dio->metadata_offset); + sector = get_data_sector(ic, area, offset); + logical_sector = dio->range.logical_sector; + + page = mempool_alloc(&ic->recheck_pool, GFP_NOIO); + buffer = page_to_virt(page); + + __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { + unsigned pos = 0; + + do { + char *mem; + int r; + struct dm_io_request io_req; + struct dm_io_region io_loc; + io_req.bi_opf = REQ_OP_READ; + io_req.mem.type = DM_IO_KMEM; + io_req.mem.ptr.addr = buffer; + io_req.notify.fn = NULL; + io_req.client = ic->io; + io_loc.bdev = ic->dev->bdev; + io_loc.sector = sector; + io_loc.count = ic->sectors_per_block; + + r = dm_io(&io_req, 1, &io_loc, NULL); + if (unlikely(r)) { + dio->bi_status = errno_to_blk_status(r); + goto free_ret; + } + + integrity_sector_checksum(ic, logical_sector, buffer, + checksum_onstack); + r = dm_integrity_rw_tag(ic, checksum_onstack, &dio->metadata_block, + &dio->metadata_offset, ic->tag_size, TAG_CMP); + if (r) { + if (r > 0) { + DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", + bio->bi_bdev, logical_sector); + atomic64_inc(&ic->number_of_mismatches); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", + bio, logical_sector, 0); + r = -EILSEQ; + } + dio->bi_status = errno_to_blk_status(r); + goto free_ret; + } + + mem = bvec_kmap_local(&bv); + memcpy(mem + pos, buffer, ic->sectors_per_block << SECTOR_SHIFT); + kunmap_local(mem); + + pos += ic->sectors_per_block << SECTOR_SHIFT; + sector += ic->sectors_per_block; + logical_sector += ic->sectors_per_block; + } while (pos < bv.bv_len); + } +free_ret: + mempool_free(page, &ic->recheck_pool); +} + static void integrity_metadata(struct work_struct *w) { struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work); @@ -1776,15 +1851,8 @@ static void integrity_metadata(struct work_struct *w) checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE); if (unlikely(r)) { if (r > 0) { - sector_t s; - - s = sector - ((r + ic->tag_size - 1) / ic->tag_size); - DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx", - bio->bi_bdev, s); - r = -EILSEQ; - atomic64_inc(&ic->number_of_mismatches); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-checksum", - bio, s, 0); + integrity_recheck(dio); + goto skip_io; } if (likely(checksums != checksums_onstack)) kfree(checksums); @@ -4261,6 +4329,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv goto bad; } + r = mempool_init_page_pool(&ic->recheck_pool, 1, 0); + if (r) { + ti->error = "Cannot allocate mempool"; + goto bad; + } + ic->metadata_wq = alloc_workqueue("dm-integrity-metadata", WQ_MEM_RECLAIM, METADATA_WORKQUEUE_MAX_ACTIVE); if (!ic->metadata_wq) { @@ -4609,6 +4683,7 @@ static void dm_integrity_dtr(struct dm_target *ti) kvfree(ic->bbs); if (ic->bufio) dm_bufio_client_destroy(ic->bufio); + mempool_exit(&ic->recheck_pool); mempool_exit(&ic->journal_io_mempool); if (ic->io) dm_io_client_destroy(ic->io); -- GitLab From 9177f3c0dea6143d05cac1bbd28668fd0e216d11 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 19 Feb 2024 21:28:09 +0100 Subject: [PATCH 1269/1405] dm-verity: recheck the hash after a failure If a userspace process reads (with O_DIRECT) multiple blocks into the same buffer, dm-verity reports an error [1]. This commit fixes dm-verity, so that if hash verification fails, the data is read again into a kernel buffer (where userspace can't modify it) and the hash is rechecked. If the recheck succeeds, the content of the kernel buffer is copied into the user buffer; if the recheck fails, an error is reported. [1] https://people.redhat.com/~mpatocka/testcases/blk-auth-modify/read2.c Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- drivers/md/dm-verity-target.c | 86 ++++++++++++++++++++++++++++++++--- drivers/md/dm-verity.h | 6 +++ 2 files changed, 86 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 82662f5769c4a..224469e1efbcc 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -482,6 +482,63 @@ int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io, return 0; } +static int verity_recheck_copy(struct dm_verity *v, struct dm_verity_io *io, + u8 *data, size_t len) +{ + memcpy(data, io->recheck_buffer, len); + io->recheck_buffer += len; + + return 0; +} + +static int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, + struct bvec_iter start, sector_t cur_block) +{ + struct page *page; + void *buffer; + int r; + struct dm_io_request io_req; + struct dm_io_region io_loc; + + page = mempool_alloc(&v->recheck_pool, GFP_NOIO); + buffer = page_to_virt(page); + + io_req.bi_opf = REQ_OP_READ; + io_req.mem.type = DM_IO_KMEM; + io_req.mem.ptr.addr = buffer; + io_req.notify.fn = NULL; + io_req.client = v->io; + io_loc.bdev = v->data_dev->bdev; + io_loc.sector = cur_block << (v->data_dev_block_bits - SECTOR_SHIFT); + io_loc.count = 1 << (v->data_dev_block_bits - SECTOR_SHIFT); + r = dm_io(&io_req, 1, &io_loc, NULL); + if (unlikely(r)) + goto free_ret; + + r = verity_hash(v, verity_io_hash_req(v, io), buffer, + 1 << v->data_dev_block_bits, + verity_io_real_digest(v, io), true); + if (unlikely(r)) + goto free_ret; + + if (memcmp(verity_io_real_digest(v, io), + verity_io_want_digest(v, io), v->digest_size)) { + r = -EIO; + goto free_ret; + } + + io->recheck_buffer = buffer; + r = verity_for_bv_block(v, io, &start, verity_recheck_copy); + if (unlikely(r)) + goto free_ret; + + r = 0; +free_ret: + mempool_free(page, &v->recheck_pool); + + return r; +} + static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io, u8 *data, size_t len) { @@ -508,9 +565,7 @@ static int verity_verify_io(struct dm_verity_io *io) { bool is_zero; struct dm_verity *v = io->v; -#if defined(CONFIG_DM_VERITY_FEC) struct bvec_iter start; -#endif struct bvec_iter iter_copy; struct bvec_iter *iter; struct crypto_wait wait; @@ -561,10 +616,7 @@ static int verity_verify_io(struct dm_verity_io *io) if (unlikely(r < 0)) return r; -#if defined(CONFIG_DM_VERITY_FEC) - if (verity_fec_is_enabled(v)) - start = *iter; -#endif + start = *iter; r = verity_for_io_block(v, io, iter, &wait); if (unlikely(r < 0)) return r; @@ -586,6 +638,10 @@ static int verity_verify_io(struct dm_verity_io *io) * tasklet since it may sleep, so fallback to work-queue. */ return -EAGAIN; + } else if (verity_recheck(v, io, start, cur_block) == 0) { + if (v->validated_blocks) + set_bit(cur_block, v->validated_blocks); + continue; #if defined(CONFIG_DM_VERITY_FEC) } else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, cur_block, NULL, &start) == 0) { @@ -941,6 +997,10 @@ static void verity_dtr(struct dm_target *ti) if (v->verify_wq) destroy_workqueue(v->verify_wq); + mempool_exit(&v->recheck_pool); + if (v->io) + dm_io_client_destroy(v->io); + if (v->bufio) dm_bufio_client_destroy(v->bufio); @@ -1379,6 +1439,20 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv) } v->hash_blocks = hash_position; + r = mempool_init_page_pool(&v->recheck_pool, 1, 0); + if (unlikely(r)) { + ti->error = "Cannot allocate mempool"; + goto bad; + } + + v->io = dm_io_client_create(); + if (IS_ERR(v->io)) { + r = PTR_ERR(v->io); + v->io = NULL; + ti->error = "Cannot allocate dm io"; + goto bad; + } + v->bufio = dm_bufio_client_create(v->hash_dev->bdev, 1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux), dm_bufio_alloc_callback, NULL, diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index f3f6070084196..4620a98c99561 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -11,6 +11,7 @@ #ifndef DM_VERITY_H #define DM_VERITY_H +#include #include #include #include @@ -68,6 +69,9 @@ struct dm_verity { unsigned long *validated_blocks; /* bitset blocks validated */ char *signature_key_desc; /* signature keyring reference */ + + struct dm_io_client *io; + mempool_t recheck_pool; }; struct dm_verity_io { @@ -84,6 +88,8 @@ struct dm_verity_io { struct work_struct work; + char *recheck_buffer; + /* * Three variably-size fields follow this struct: * -- GitLab From 50c70240097ce41fe6bce6478b80478281e4d0f7 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 19 Feb 2024 21:30:10 +0100 Subject: [PATCH 1270/1405] dm-crypt: don't modify the data when using authenticated encryption It was said that authenticated encryption could produce invalid tag when the data that is being encrypted is modified [1]. So, fix this problem by copying the data into the clone bio first and then encrypt them inside the clone bio. This may reduce performance, but it is needed to prevent the user from corrupting the device by writing data with O_DIRECT and modifying them at the same time. [1] https://lore.kernel.org/all/20240207004723.GA35324@sol.localdomain/T/ Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index f745f85082434..14c5be6eda3b6 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2071,6 +2071,12 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) io->ctx.bio_out = clone; io->ctx.iter_out = clone->bi_iter; + if (crypt_integrity_aead(cc)) { + bio_copy_data(clone, io->base_bio); + io->ctx.bio_in = clone; + io->ctx.iter_in = clone->bi_iter; + } + sector += bio_sectors(clone); crypt_inc_pending(io); -- GitLab From 42e15d12070b4ff9af2b980f1b65774c2dab0507 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 19 Feb 2024 21:31:11 +0100 Subject: [PATCH 1271/1405] dm-crypt: recheck the integrity tag after a failure If a userspace process reads (with O_DIRECT) multiple blocks into the same buffer, dm-crypt reports an authentication error [1]. The error is reported in a log and it may cause RAID leg being kicked out of the array. This commit fixes dm-crypt, so that if integrity verification fails, the data is read again into a kernel buffer (where userspace can't modify it) and the integrity tag is rechecked. If the recheck succeeds, the content of the kernel buffer is copied into the user buffer; if the recheck fails, an integrity error is reported. [1] https://people.redhat.com/~mpatocka/testcases/blk-auth-modify/read2.c Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 89 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 16 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 14c5be6eda3b6..6044614bab761 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -62,6 +62,8 @@ struct convert_context { struct skcipher_request *req; struct aead_request *req_aead; } r; + bool aead_recheck; + bool aead_failed; }; @@ -82,6 +84,8 @@ struct dm_crypt_io { blk_status_t error; sector_t sector; + struct bvec_iter saved_bi_iter; + struct rb_node rb_node; } CRYPTO_MINALIGN_ATTR; @@ -1370,10 +1374,13 @@ static int crypt_convert_block_aead(struct crypt_config *cc, if (r == -EBADMSG) { sector_t s = le64_to_cpu(*sector); - DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", - ctx->bio_in->bi_bdev, s); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", - ctx->bio_in, s, 0); + ctx->aead_failed = true; + if (ctx->aead_recheck) { + DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", + ctx->bio_in->bi_bdev, s); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", + ctx->bio_in, s, 0); + } } if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post) @@ -1757,6 +1764,8 @@ static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, io->base_bio = bio; io->sector = sector; io->error = 0; + io->ctx.aead_recheck = false; + io->ctx.aead_failed = false; io->ctx.r.req = NULL; io->integrity_metadata = NULL; io->integrity_metadata_from_pool = false; @@ -1768,6 +1777,8 @@ static void crypt_inc_pending(struct dm_crypt_io *io) atomic_inc(&io->io_pending); } +static void kcryptd_queue_read(struct dm_crypt_io *io); + /* * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. @@ -1781,6 +1792,15 @@ static void crypt_dec_pending(struct dm_crypt_io *io) if (!atomic_dec_and_test(&io->io_pending)) return; + if (likely(!io->ctx.aead_recheck) && unlikely(io->ctx.aead_failed) && + cc->on_disk_tag_size && bio_data_dir(base_bio) == READ) { + io->ctx.aead_recheck = true; + io->ctx.aead_failed = false; + io->error = 0; + kcryptd_queue_read(io); + return; + } + if (io->ctx.r.req) crypt_free_req(cc, io->ctx.r.req, base_bio); @@ -1816,15 +1836,19 @@ static void crypt_endio(struct bio *clone) struct dm_crypt_io *io = clone->bi_private; struct crypt_config *cc = io->cc; unsigned int rw = bio_data_dir(clone); - blk_status_t error; + blk_status_t error = clone->bi_status; + + if (io->ctx.aead_recheck && !error) { + kcryptd_queue_crypt(io); + return; + } /* * free the processed pages */ - if (rw == WRITE) + if (rw == WRITE || io->ctx.aead_recheck) crypt_free_buffer_pages(cc, clone); - error = clone->bi_status; bio_put(clone); if (rw == READ && !error) { @@ -1845,6 +1869,22 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) struct crypt_config *cc = io->cc; struct bio *clone; + if (io->ctx.aead_recheck) { + if (!(gfp & __GFP_DIRECT_RECLAIM)) + return 1; + crypt_inc_pending(io); + clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size); + if (unlikely(!clone)) { + crypt_dec_pending(io); + return 1; + } + clone->bi_iter.bi_sector = cc->start + io->sector; + crypt_convert_init(cc, &io->ctx, clone, clone, io->sector); + io->saved_bi_iter = clone->bi_iter; + dm_submit_bio_remap(io->base_bio, clone); + return 0; + } + /* * We need the original biovec array in order to decrypt the whole bio * data *afterwards* -- thanks to immutable biovecs we don't need to @@ -2113,6 +2153,14 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) static void kcryptd_crypt_read_done(struct dm_crypt_io *io) { + if (io->ctx.aead_recheck) { + if (!io->error) { + io->ctx.bio_in->bi_iter = io->saved_bi_iter; + bio_copy_data(io->base_bio, io->ctx.bio_in); + } + crypt_free_buffer_pages(io->cc, io->ctx.bio_in); + bio_put(io->ctx.bio_in); + } crypt_dec_pending(io); } @@ -2142,11 +2190,17 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) crypt_inc_pending(io); - crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, - io->sector); + if (io->ctx.aead_recheck) { + io->ctx.cc_sector = io->sector + cc->iv_offset; + r = crypt_convert(cc, &io->ctx, + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + } else { + crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio, + io->sector); - r = crypt_convert(cc, &io->ctx, - test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + r = crypt_convert(cc, &io->ctx, + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + } /* * Crypto API backlogged the request, because its queue was full * and we're in softirq context, so continue from a workqueue @@ -2188,10 +2242,13 @@ static void kcryptd_async_done(void *data, int error) if (error == -EBADMSG) { sector_t s = le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)); - DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", - ctx->bio_in->bi_bdev, s); - dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", - ctx->bio_in, s, 0); + ctx->aead_failed = true; + if (ctx->aead_recheck) { + DMERR_LIMIT("%pg: INTEGRITY AEAD ERROR, sector %llu", + ctx->bio_in->bi_bdev, s); + dm_audit_log_bio(DM_MSG_PREFIX, "integrity-aead", + ctx->bio_in, s, 0); + } io->error = BLK_STS_PROTECTION; } else if (error < 0) io->error = BLK_STS_IOERR; @@ -3116,7 +3173,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar sval = strchr(opt_string + strlen("integrity:"), ':') + 1; if (!strcasecmp(sval, "aead")) { set_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags); - } else if (strcasecmp(sval, "none")) { + } else if (strcasecmp(sval, "none")) { ti->error = "Unknown integrity profile"; return -EINVAL; } -- GitLab From 787f1b2800464aa277236a66eb3c279535edd460 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 20 Feb 2024 19:11:51 +0100 Subject: [PATCH 1272/1405] dm-verity, dm-crypt: align "struct bvec_iter" correctly "struct bvec_iter" is defined with the __packed attribute, so it is aligned on a single byte. On X86 (and on other architectures that support unaligned addresses in hardware), "struct bvec_iter" is accessed using the 8-byte and 4-byte memory instructions, however these instructions are less efficient if they operate on unaligned addresses. (on RISC machines that don't have unaligned access in hardware, GCC generates byte-by-byte accesses that are very inefficient - see [1]) This commit reorders the entries in "struct dm_verity_io" and "struct convert_context", so that "struct bvec_iter" is aligned on 8 bytes. [1] https://lore.kernel.org/all/ZcLuWUNRZadJr0tQ@fedora/T/ Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 4 ++-- drivers/md/dm-verity.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 6044614bab761..f54cd6714af3b 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -53,11 +53,11 @@ struct convert_context { struct completion restart; struct bio *bio_in; - struct bio *bio_out; struct bvec_iter iter_in; + struct bio *bio_out; struct bvec_iter iter_out; - u64 cc_sector; atomic_t cc_pending; + u64 cc_sector; union { struct skcipher_request *req; struct aead_request *req_aead; diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 4620a98c99561..db93a91169d5e 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -80,12 +80,12 @@ struct dm_verity_io { /* original value of bio->bi_end_io */ bio_end_io_t *orig_bi_end_io; + struct bvec_iter iter; + sector_t block; unsigned int n_blocks; bool in_tasklet; - struct bvec_iter iter; - struct work_struct work; char *recheck_buffer; -- GitLab From 0e0c50e85a364bb7f1c52c84affe7f9e88c57da7 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 20 Feb 2024 13:32:52 -0500 Subject: [PATCH 1273/1405] dm-crypt, dm-integrity, dm-verity: bump target version Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 2 +- drivers/md/dm-integrity.c | 2 +- drivers/md/dm-verity-target.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index f54cd6714af3b..59445763e55a6 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -3702,7 +3702,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type crypt_target = { .name = "crypt", - .version = {1, 24, 0}, + .version = {1, 25, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index e46c798c5c364..44a1dcc06dd73 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4736,7 +4736,7 @@ static void dm_integrity_dtr(struct dm_target *ti) static struct target_type integrity_target = { .name = "integrity", - .version = {1, 10, 0}, + .version = {1, 11, 0}, .module = THIS_MODULE, .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, .ctr = dm_integrity_ctr, diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 224469e1efbcc..06c8b637849c9 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -1560,7 +1560,7 @@ int dm_verity_get_root_digest(struct dm_target *ti, u8 **root_digest, unsigned i static struct target_type verity_target = { .name = "verity", .features = DM_TARGET_IMMUTABLE, - .version = {1, 9, 0}, + .version = {1, 10, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, -- GitLab From fca7526b7d8910c6125cb1ebc3e78ccd5f50ec52 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 20 Feb 2024 12:16:47 -0800 Subject: [PATCH 1274/1405] drm/tests/drm_buddy: fix build failure on 32-bit targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Guenter Roeck reports that commit a64056bb5a32 ("drm/tests/drm_buddy: add alloc_contiguous test") causes build failures on 32-bit targets: "This patch breaks the build on all 32-bit systems since it introduces an unhandled direct 64-bit divide operation. ERROR: modpost: "__umoddi3" [drivers/gpu/drm/tests/drm_buddy_test.ko] undefined! ERROR: modpost: "__moddi3" [drivers/gpu/drm/tests/drm_buddy_test.ko] undefined!" and the uses of 'u64' are all entirely pointless. Yes, the arguments to drm_buddy_init() and drm_buddy_alloc_blocks() are in fact of type 'u64', but none of the values here are remotely relevant, and the compiler will happily just do the type expansion. Of course, in a perfect world the compiler would also have just noticed that all the values in question are tiny, and range analysis would have shown that doing a 64-bit divide is pointless, but that is admittedly expecting a fair amount of the compiler. IOW, we shouldn't write code that the compiler then has to notice is unnecessarily complicated just to avoid extra work. We do have fairly high expectations of compilers, but kernel code should be reasonable to begin with. It turns out that there are also other issues with this code: the KUnit assertion messages have incorrect types in the format strings, but that's a widely spread issue caused by the KUnit infrastructure not having enabled format string verification. We'll get that sorted out separately. Reported-by: Guenter Roeck Fixes: a64056bb5a32 ("drm/tests/drm_buddy: add alloc_contiguous test") Link: https://lore.kernel.org/all/538327ff-8d34-41d5-a9ae-1a334744f5ae@roeck-us.net/ Cc: Matthew Auld Cc: Arunpravin Paneer Selvam Cc: Christian König Signed-off-by: Linus Torvalds --- drivers/gpu/drm/tests/drm_buddy_test.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c index fee6bec757d1a..8a464f7f4c61d 100644 --- a/drivers/gpu/drm/tests/drm_buddy_test.c +++ b/drivers/gpu/drm/tests/drm_buddy_test.c @@ -21,7 +21,8 @@ static inline u64 get_size(int order, u64 chunk_size) static void drm_test_buddy_alloc_contiguous(struct kunit *test) { - u64 mm_size, ps = SZ_4K, i, n_pages, total; + const unsigned long ps = SZ_4K, mm_size = 16 * 3 * SZ_4K; + unsigned long i, n_pages, total; struct drm_buddy_block *block; struct drm_buddy mm; LIST_HEAD(left); @@ -29,8 +30,6 @@ static void drm_test_buddy_alloc_contiguous(struct kunit *test) LIST_HEAD(right); LIST_HEAD(allocated); - mm_size = 16 * 3 * SZ_4K; - KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps)); /* -- GitLab From e08f65491c0e467127310b13333dd61e89c14bc1 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 13 Feb 2024 13:34:25 -0600 Subject: [PATCH 1275/1405] arm64: dts: freescale: Disable interrupt_map check Several Freescale Layerscape platforms extirq binding use a malformed interrupt-map property missing parent address cells. These are documented in of_irq_imap_abusers list in drivers/of/irq.c. In order to enable dtc interrupt_map check tree wide, we need to disable it for these platforms which will not be fixed (as that would break compatibility). Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20240213-arm-dt-cleanups-v1-1-f2dee1292525@kernel.org Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/freescale/Makefile | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/Makefile b/arch/arm64/boot/dts/freescale/Makefile index 2e027675d7bbe..2cb0212b63c6e 100644 --- a/arch/arm64/boot/dts/freescale/Makefile +++ b/arch/arm64/boot/dts/freescale/Makefile @@ -20,23 +20,41 @@ dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1046a-frwy.dtb dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1046a-qds.dtb dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1046a-rdb.dtb dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1046a-tqmls1046a-mbls10xxa.dtb +DTC_FLAGS_fsl-ls1088a-qds := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1088a-qds.dtb +DTC_FLAGS_fsl-ls1088a-rdb := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1088a-rdb.dtb +DTC_FLAGS_fsl-ls1088a-ten64 := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1088a-ten64.dtb +DTC_FLAGS_fsl-ls1088a-tqmls1088a-mbls10xxa := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1088a-tqmls1088a-mbls10xxa.dtb +DTC_FLAGS_fsl-ls2080a-qds := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2080a-qds.dtb +DTC_FLAGS_fsl-ls2080a-rdb := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2080a-rdb.dtb +DTC_FLAGS_fsl-ls2081a-rdb := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2081a-rdb.dtb +DTC_FLAGS_fsl-ls2080a-simu := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2080a-simu.dtb +DTC_FLAGS_fsl-ls2088a-qds := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2088a-qds.dtb +DTC_FLAGS_fsl-ls2088a-rdb := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls2088a-rdb.dtb +DTC_FLAGS_fsl-lx2160a-bluebox3 := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-bluebox3.dtb +DTC_FLAGS_fsl-lx2160a-bluebox3-rev-a := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-bluebox3-rev-a.dtb +DTC_FLAGS_fsl-lx2160a-clearfog-cx := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-clearfog-cx.dtb +DTC_FLAGS_fsl-lx2160a-honeycomb := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-honeycomb.dtb +DTC_FLAGS_fsl-lx2160a-qds := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-qds.dtb +DTC_FLAGS_fsl-lx2160a-rdb := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2160a-rdb.dtb +DTC_FLAGS_fsl-lx2162a-clearfog := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2162a-clearfog.dtb +DTC_FLAGS_fsl-lx2162a-qds := -Wno-interrupt_map dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-lx2162a-qds.dtb fsl-ls1028a-qds-13bb-dtbs := fsl-ls1028a-qds.dtb fsl-ls1028a-qds-13bb.dtbo @@ -53,6 +71,7 @@ dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1028a-qds-85bb.dtb dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1028a-qds-899b.dtb dtb-$(CONFIG_ARCH_LAYERSCAPE) += fsl-ls1028a-qds-9999.dtb +DTC_FLAGS_fsl-lx2160a-tqmlx2160a-mblx2160a := -Wno-interrupt_map fsl-lx2160a-tqmlx2160a-mblx2160a-12-11-x-dtbs := fsl-lx2160a-tqmlx2160a-mblx2160a.dtb \ fsl-lx2160a-tqmlx2160a-mblx2160a_12_x_x.dtbo \ fsl-lx2160a-tqmlx2160a-mblx2160a_x_11_x.dtbo -- GitLab From 96fd598e9c34cfa68402a4da3020c9236cfacf35 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 13 Feb 2024 13:34:26 -0600 Subject: [PATCH 1276/1405] arm: dts: Fix dtc interrupt_provider warnings The dtc interrupt_provider warning is off by default. Fix all the warnings so it can be enabled. Signed-off-by: Rob Herring Reviewed-by: Andrew Jeffery Reviewed-by: Alexandre Torgue Acked-by: Florian Fainelli #Broadcom Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20240213-arm-dt-cleanups-v1-2-f2dee1292525@kernel.org Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/amazon/alpine.dtsi | 1 - arch/arm/boot/dts/aspeed/aspeed-g4.dtsi | 14 -------------- arch/arm/boot/dts/aspeed/aspeed-g5.dtsi | 15 +-------------- arch/arm/boot/dts/aspeed/aspeed-g6.dtsi | 18 ++---------------- arch/arm/boot/dts/broadcom/bcm-cygnus.dtsi | 3 +++ arch/arm/boot/dts/broadcom/bcm-hr2.dtsi | 1 + arch/arm/boot/dts/broadcom/bcm-nsp.dtsi | 2 ++ arch/arm/boot/dts/marvell/kirkwood-l-50.dts | 2 ++ arch/arm/boot/dts/nuvoton/nuvoton-wpcm450.dtsi | 2 ++ .../boot/dts/nvidia/tegra30-apalis-v1.1.dtsi | 1 - arch/arm/boot/dts/nvidia/tegra30-apalis.dtsi | 1 - arch/arm/boot/dts/nvidia/tegra30-colibri.dtsi | 1 - arch/arm/boot/dts/nxp/imx/imx6q-b850v3.dts | 3 --- arch/arm/boot/dts/nxp/imx/imx6q-bx50v3.dtsi | 2 +- arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi | 1 - arch/arm/boot/dts/nxp/imx/imx6qdl-colibri.dtsi | 1 - arch/arm/boot/dts/nxp/imx/imx6qdl-emcon.dtsi | 1 - .../dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi | 1 + .../nxp/imx/imx6qdl-phytec-phycore-som.dtsi | 1 + arch/arm/boot/dts/nxp/imx/imx7d-pico-dwarf.dts | 1 + .../boot/dts/nxp/vf/vf610-zii-dev-rev-b.dts | 1 + arch/arm/boot/dts/st/stm32429i-eval.dts | 1 - arch/arm/boot/dts/st/stm32mp157c-dk2.dts | 1 - .../boot/dts/ti/omap/am5729-beagleboneai.dts | 1 - 24 files changed, 18 insertions(+), 58 deletions(-) diff --git a/arch/arm/boot/dts/amazon/alpine.dtsi b/arch/arm/boot/dts/amazon/alpine.dtsi index ff68dfb4eb787..90bd12feac010 100644 --- a/arch/arm/boot/dts/amazon/alpine.dtsi +++ b/arch/arm/boot/dts/amazon/alpine.dtsi @@ -167,7 +167,6 @@ msix: msix@fbe00000 { compatible = "al,alpine-msix"; reg = <0x0 0xfbe00000 0x0 0x100000>; - interrupt-controller; msi-controller; al,msi-base-spi = <96>; al,msi-num-spis = <64>; diff --git a/arch/arm/boot/dts/aspeed/aspeed-g4.dtsi b/arch/arm/boot/dts/aspeed/aspeed-g4.dtsi index 530491ae5eb26..857cb26ed6d7e 100644 --- a/arch/arm/boot/dts/aspeed/aspeed-g4.dtsi +++ b/arch/arm/boot/dts/aspeed/aspeed-g4.dtsi @@ -466,7 +466,6 @@ i2c0: i2c-bus@40 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x40 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -482,7 +481,6 @@ i2c1: i2c-bus@80 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x80 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -498,7 +496,6 @@ i2c2: i2c-bus@c0 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0xc0 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -515,7 +512,6 @@ i2c3: i2c-bus@100 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x100 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -532,7 +528,6 @@ i2c4: i2c-bus@140 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x140 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -549,7 +544,6 @@ i2c5: i2c-bus@180 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x180 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -566,7 +560,6 @@ i2c6: i2c-bus@1c0 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x1c0 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -583,7 +576,6 @@ i2c7: i2c-bus@300 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x300 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -600,7 +592,6 @@ i2c8: i2c-bus@340 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x340 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -617,7 +608,6 @@ i2c9: i2c-bus@380 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x380 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -634,7 +624,6 @@ i2c10: i2c-bus@3c0 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x3c0 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -651,7 +640,6 @@ i2c11: i2c-bus@400 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x400 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -668,7 +656,6 @@ i2c12: i2c-bus@440 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x440 0x40>; compatible = "aspeed,ast2400-i2c-bus"; @@ -685,7 +672,6 @@ i2c13: i2c-bus@480 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x480 0x40>; compatible = "aspeed,ast2400-i2c-bus"; diff --git a/arch/arm/boot/dts/aspeed/aspeed-g5.dtsi b/arch/arm/boot/dts/aspeed/aspeed-g5.dtsi index 04f98d1dbb97c..e6f3cf3c721e5 100644 --- a/arch/arm/boot/dts/aspeed/aspeed-g5.dtsi +++ b/arch/arm/boot/dts/aspeed/aspeed-g5.dtsi @@ -363,6 +363,7 @@ interrupts = <40>; reg = <0x1e780200 0x0100>; clocks = <&syscon ASPEED_CLK_APB>; + #interrupt-cells = <2>; interrupt-controller; bus-frequency = <12000000>; pinctrl-names = "default"; @@ -594,7 +595,6 @@ i2c0: i2c-bus@40 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x40 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -610,7 +610,6 @@ i2c1: i2c-bus@80 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x80 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -626,7 +625,6 @@ i2c2: i2c-bus@c0 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0xc0 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -643,7 +641,6 @@ i2c3: i2c-bus@100 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x100 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -660,7 +657,6 @@ i2c4: i2c-bus@140 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x140 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -677,7 +673,6 @@ i2c5: i2c-bus@180 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x180 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -694,7 +689,6 @@ i2c6: i2c-bus@1c0 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x1c0 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -711,7 +705,6 @@ i2c7: i2c-bus@300 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x300 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -728,7 +721,6 @@ i2c8: i2c-bus@340 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x340 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -745,7 +737,6 @@ i2c9: i2c-bus@380 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x380 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -762,7 +753,6 @@ i2c10: i2c-bus@3c0 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x3c0 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -779,7 +769,6 @@ i2c11: i2c-bus@400 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x400 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -796,7 +785,6 @@ i2c12: i2c-bus@440 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x440 0x40>; compatible = "aspeed,ast2500-i2c-bus"; @@ -813,7 +801,6 @@ i2c13: i2c-bus@480 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x480 0x40>; compatible = "aspeed,ast2500-i2c-bus"; diff --git a/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi index c4d1faade8be3..29f94696d8b18 100644 --- a/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi +++ b/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi @@ -474,6 +474,7 @@ reg = <0x1e780500 0x100>; interrupts = ; clocks = <&syscon ASPEED_CLK_APB2>; + #interrupt-cells = <2>; interrupt-controller; bus-frequency = <12000000>; pinctrl-names = "default"; @@ -488,6 +489,7 @@ reg = <0x1e780600 0x100>; interrupts = ; clocks = <&syscon ASPEED_CLK_APB2>; + #interrupt-cells = <2>; interrupt-controller; bus-frequency = <12000000>; pinctrl-names = "default"; @@ -902,7 +904,6 @@ i2c0: i2c-bus@80 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x80 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -917,7 +918,6 @@ i2c1: i2c-bus@100 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x100 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -932,7 +932,6 @@ i2c2: i2c-bus@180 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x180 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -947,7 +946,6 @@ i2c3: i2c-bus@200 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x200 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -962,7 +960,6 @@ i2c4: i2c-bus@280 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x280 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -977,7 +974,6 @@ i2c5: i2c-bus@300 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x300 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -992,7 +988,6 @@ i2c6: i2c-bus@380 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x380 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -1007,7 +1002,6 @@ i2c7: i2c-bus@400 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x400 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -1022,7 +1016,6 @@ i2c8: i2c-bus@480 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x480 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -1037,7 +1030,6 @@ i2c9: i2c-bus@500 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x500 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -1052,7 +1044,6 @@ i2c10: i2c-bus@580 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x580 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -1067,7 +1058,6 @@ i2c11: i2c-bus@600 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x600 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -1082,7 +1072,6 @@ i2c12: i2c-bus@680 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x680 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -1097,7 +1086,6 @@ i2c13: i2c-bus@700 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x700 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -1112,7 +1100,6 @@ i2c14: i2c-bus@780 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x780 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; @@ -1127,7 +1114,6 @@ i2c15: i2c-bus@800 { #address-cells = <1>; #size-cells = <0>; - #interrupt-cells = <1>; reg = <0x800 0x80>; compatible = "aspeed,ast2600-i2c-bus"; clocks = <&syscon ASPEED_CLK_APB2>; diff --git a/arch/arm/boot/dts/broadcom/bcm-cygnus.dtsi b/arch/arm/boot/dts/broadcom/bcm-cygnus.dtsi index f9f79ed825181..07ca0d993c9fd 100644 --- a/arch/arm/boot/dts/broadcom/bcm-cygnus.dtsi +++ b/arch/arm/boot/dts/broadcom/bcm-cygnus.dtsi @@ -167,6 +167,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&mailbox>; interrupts = <0>; }; @@ -247,6 +248,7 @@ gpio-controller; interrupts = ; interrupt-controller; + #interrupt-cells = <2>; }; i2c1: i2c@1800b000 { @@ -518,6 +520,7 @@ gpio-controller; interrupt-controller; + #interrupt-cells = <2>; interrupts = ; gpio-ranges = <&pinctrl 0 42 1>, <&pinctrl 1 44 3>, diff --git a/arch/arm/boot/dts/broadcom/bcm-hr2.dtsi b/arch/arm/boot/dts/broadcom/bcm-hr2.dtsi index 788a6806191a3..75545b10ef2fa 100644 --- a/arch/arm/boot/dts/broadcom/bcm-hr2.dtsi +++ b/arch/arm/boot/dts/broadcom/bcm-hr2.dtsi @@ -200,6 +200,7 @@ gpio-controller; ngpios = <4>; interrupt-controller; + #interrupt-cells = <2>; interrupts = ; }; diff --git a/arch/arm/boot/dts/broadcom/bcm-nsp.dtsi b/arch/arm/boot/dts/broadcom/bcm-nsp.dtsi index 9d20ba3b1ffb1..6a4482c931674 100644 --- a/arch/arm/boot/dts/broadcom/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/broadcom/bcm-nsp.dtsi @@ -180,6 +180,7 @@ gpio-controller; ngpios = <32>; interrupt-controller; + #interrupt-cells = <2>; interrupts = ; gpio-ranges = <&pinctrl 0 0 32>; }; @@ -352,6 +353,7 @@ gpio-controller; ngpios = <4>; interrupt-controller; + #interrupt-cells = <2>; interrupts = ; }; diff --git a/arch/arm/boot/dts/marvell/kirkwood-l-50.dts b/arch/arm/boot/dts/marvell/kirkwood-l-50.dts index dffb9f84e67c5..c841eb8e7fb1d 100644 --- a/arch/arm/boot/dts/marvell/kirkwood-l-50.dts +++ b/arch/arm/boot/dts/marvell/kirkwood-l-50.dts @@ -65,6 +65,7 @@ gpio2: gpio-expander@20 { #gpio-cells = <2>; #interrupt-cells = <2>; + interrupt-controller; compatible = "semtech,sx1505q"; reg = <0x20>; @@ -79,6 +80,7 @@ gpio3: gpio-expander@21 { #gpio-cells = <2>; #interrupt-cells = <2>; + interrupt-controller; compatible = "semtech,sx1505q"; reg = <0x21>; diff --git a/arch/arm/boot/dts/nuvoton/nuvoton-wpcm450.dtsi b/arch/arm/boot/dts/nuvoton/nuvoton-wpcm450.dtsi index fd671c7a1e5d6..6e1f0f164cb4f 100644 --- a/arch/arm/boot/dts/nuvoton/nuvoton-wpcm450.dtsi +++ b/arch/arm/boot/dts/nuvoton/nuvoton-wpcm450.dtsi @@ -120,6 +120,7 @@ interrupts = <2 IRQ_TYPE_LEVEL_HIGH>, <3 IRQ_TYPE_LEVEL_HIGH>, <4 IRQ_TYPE_LEVEL_HIGH>; + #interrupt-cells = <2>; interrupt-controller; }; @@ -128,6 +129,7 @@ gpio-controller; #gpio-cells = <2>; interrupts = <5 IRQ_TYPE_LEVEL_HIGH>; + #interrupt-cells = <2>; interrupt-controller; }; diff --git a/arch/arm/boot/dts/nvidia/tegra30-apalis-v1.1.dtsi b/arch/arm/boot/dts/nvidia/tegra30-apalis-v1.1.dtsi index 1640763fd4af2..ff0d684622f74 100644 --- a/arch/arm/boot/dts/nvidia/tegra30-apalis-v1.1.dtsi +++ b/arch/arm/boot/dts/nvidia/tegra30-apalis-v1.1.dtsi @@ -997,7 +997,6 @@ compatible = "st,stmpe811"; reg = <0x41>; irq-gpio = <&gpio TEGRA_GPIO(V, 0) GPIO_ACTIVE_LOW>; - interrupt-controller; id = <0>; blocks = <0x5>; irq-trigger = <0x1>; diff --git a/arch/arm/boot/dts/nvidia/tegra30-apalis.dtsi b/arch/arm/boot/dts/nvidia/tegra30-apalis.dtsi index 3b6fad273cabf..d38f1dd38a906 100644 --- a/arch/arm/boot/dts/nvidia/tegra30-apalis.dtsi +++ b/arch/arm/boot/dts/nvidia/tegra30-apalis.dtsi @@ -980,7 +980,6 @@ compatible = "st,stmpe811"; reg = <0x41>; irq-gpio = <&gpio TEGRA_GPIO(V, 0) GPIO_ACTIVE_LOW>; - interrupt-controller; id = <0>; blocks = <0x5>; irq-trigger = <0x1>; diff --git a/arch/arm/boot/dts/nvidia/tegra30-colibri.dtsi b/arch/arm/boot/dts/nvidia/tegra30-colibri.dtsi index 4eb526fe9c558..81c8a5fd92cce 100644 --- a/arch/arm/boot/dts/nvidia/tegra30-colibri.dtsi +++ b/arch/arm/boot/dts/nvidia/tegra30-colibri.dtsi @@ -861,7 +861,6 @@ compatible = "st,stmpe811"; reg = <0x41>; irq-gpio = <&gpio TEGRA_GPIO(V, 0) GPIO_ACTIVE_LOW>; - interrupt-controller; id = <0>; blocks = <0x5>; irq-trigger = <0x1>; diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-b850v3.dts b/arch/arm/boot/dts/nxp/imx/imx6q-b850v3.dts index db8c332df6a1d..cad112e054758 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6q-b850v3.dts +++ b/arch/arm/boot/dts/nxp/imx/imx6q-b850v3.dts @@ -227,7 +227,6 @@ #address-cells = <3>; #size-cells = <2>; - #interrupt-cells = <1>; bridge@2,1 { compatible = "pci10b5,8605"; @@ -235,7 +234,6 @@ #address-cells = <3>; #size-cells = <2>; - #interrupt-cells = <1>; /* Intel Corporation I210 Gigabit Network Connection */ ethernet@3,0 { @@ -250,7 +248,6 @@ #address-cells = <3>; #size-cells = <2>; - #interrupt-cells = <1>; /* Intel Corporation I210 Gigabit Network Connection */ switch_nic: ethernet@4,0 { diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-bx50v3.dtsi b/arch/arm/boot/dts/nxp/imx/imx6q-bx50v3.dtsi index 99f4f6ac71d4a..c1ae7c47b4422 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6q-bx50v3.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6q-bx50v3.dtsi @@ -245,6 +245,7 @@ reg = <0x74>; gpio-controller; #gpio-cells = <2>; + #interrupt-cells = <2>; interrupt-controller; interrupt-parent = <&gpio2>; interrupts = <3 IRQ_TYPE_LEVEL_LOW>; @@ -390,7 +391,6 @@ #address-cells = <3>; #size-cells = <2>; - #interrupt-cells = <1>; }; }; diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi index 2ae93f57fe5ac..ea40623d12e5f 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi @@ -626,7 +626,6 @@ blocks = <0x5>; id = <0>; interrupts = <10 IRQ_TYPE_LEVEL_LOW>; - interrupt-controller; interrupt-parent = <&gpio4>; irq-trigger = <0x1>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-colibri.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-colibri.dtsi index 55c90f6393ad5..d3a7a6eeb8e09 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-colibri.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-colibri.dtsi @@ -550,7 +550,6 @@ blocks = <0x5>; interrupts = <20 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&gpio6>; - interrupt-controller; id = <0>; irq-trigger = <0x1>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-emcon.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-emcon.dtsi index a63e73adc1fc5..42b2ba23aefc9 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-emcon.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-emcon.dtsi @@ -225,7 +225,6 @@ pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio2>; interrupts = <8 IRQ_TYPE_LEVEL_LOW>; - interrupt-controller; onkey { compatible = "dlg,da9063-onkey"; diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi index 113974520d544..c0c47adc5866e 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-pfla02.dtsi @@ -124,6 +124,7 @@ reg = <0x58>; interrupt-parent = <&gpio2>; interrupts = <9 IRQ_TYPE_LEVEL_LOW>; /* active-low GPIO2_9 */ + #interrupt-cells = <2>; interrupt-controller; regulators { diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi index 86b4269e0e011..85e278eb20161 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-phycore-som.dtsi @@ -100,6 +100,7 @@ interrupt-parent = <&gpio1>; interrupts = <2 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; gpio-controller; #gpio-cells = <2>; diff --git a/arch/arm/boot/dts/nxp/imx/imx7d-pico-dwarf.dts b/arch/arm/boot/dts/nxp/imx/imx7d-pico-dwarf.dts index 12361fcbe24af..1b965652291bf 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7d-pico-dwarf.dts +++ b/arch/arm/boot/dts/nxp/imx/imx7d-pico-dwarf.dts @@ -63,6 +63,7 @@ gpio-controller; #gpio-cells = <2>; #interrupt-cells = <2>; + interrupt-controller; reg = <0x25>; }; diff --git a/arch/arm/boot/dts/nxp/vf/vf610-zii-dev-rev-b.dts b/arch/arm/boot/dts/nxp/vf/vf610-zii-dev-rev-b.dts index b0ed68af05467..029f49be40e37 100644 --- a/arch/arm/boot/dts/nxp/vf/vf610-zii-dev-rev-b.dts +++ b/arch/arm/boot/dts/nxp/vf/vf610-zii-dev-rev-b.dts @@ -338,6 +338,7 @@ reg = <0x22>; gpio-controller; #gpio-cells = <2>; + #interrupt-cells = <2>; interrupt-controller; interrupt-parent = <&gpio3>; interrupts = <2 IRQ_TYPE_LEVEL_LOW>; diff --git a/arch/arm/boot/dts/st/stm32429i-eval.dts b/arch/arm/boot/dts/st/stm32429i-eval.dts index 576235ec3c516..afa417b34b25f 100644 --- a/arch/arm/boot/dts/st/stm32429i-eval.dts +++ b/arch/arm/boot/dts/st/stm32429i-eval.dts @@ -222,7 +222,6 @@ reg = <0x42>; interrupts = <8 3>; interrupt-parent = <&gpioi>; - interrupt-controller; wakeup-source; stmpegpio: stmpe_gpio { diff --git a/arch/arm/boot/dts/st/stm32mp157c-dk2.dts b/arch/arm/boot/dts/st/stm32mp157c-dk2.dts index 510cca5acb79c..7a701f7ef0c70 100644 --- a/arch/arm/boot/dts/st/stm32mp157c-dk2.dts +++ b/arch/arm/boot/dts/st/stm32mp157c-dk2.dts @@ -64,7 +64,6 @@ reg = <0x38>; interrupts = <2 2>; interrupt-parent = <&gpiof>; - interrupt-controller; touchscreen-size-x = <480>; touchscreen-size-y = <800>; status = "okay"; diff --git a/arch/arm/boot/dts/ti/omap/am5729-beagleboneai.dts b/arch/arm/boot/dts/ti/omap/am5729-beagleboneai.dts index c8e55642f9c6e..3e834fc7e3707 100644 --- a/arch/arm/boot/dts/ti/omap/am5729-beagleboneai.dts +++ b/arch/arm/boot/dts/ti/omap/am5729-beagleboneai.dts @@ -415,7 +415,6 @@ reg = <0x41>; interrupts = <30 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&gpio2>; - interrupt-controller; id = <0>; blocks = <0x5>; irq-trigger = <0x1>; -- GitLab From 91adecf911e5df78ea3e8f866e69db2c33416a5c Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 13 Feb 2024 13:34:27 -0600 Subject: [PATCH 1277/1405] arm64: dts: Fix dtc interrupt_provider warnings The dtc interrupt_provider warning is off by default. Fix all the warnings so it can be enabled. Signed-off-by: Rob Herring Reviewed-By: AngeloGioacchino Del Regno # Reviewed-by: Geert Uytterhoeven Acked-by: Geert Uytterhoeven Acked-by: Florian Fainelli #Broadcom Acked-by: Chanho Min Link: https://lore.kernel.org/r/20240213-arm-dt-cleanups-v1-3-f2dee1292525@kernel.org Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/amazon/alpine-v2.dtsi | 1 - arch/arm64/boot/dts/amazon/alpine-v3.dtsi | 1 - arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi | 1 + arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi | 1 + arch/arm64/boot/dts/lg/lg1312.dtsi | 1 - arch/arm64/boot/dts/lg/lg1313.dtsi | 1 - arch/arm64/boot/dts/marvell/armada-ap80x.dtsi | 1 - arch/arm64/boot/dts/mediatek/mt8195-demo.dts | 1 + arch/arm64/boot/dts/renesas/ulcb-kf.dtsi | 4 ++++ 9 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi index dccbba6e7f98e..dbf2dce8d1d68 100644 --- a/arch/arm64/boot/dts/amazon/alpine-v2.dtsi +++ b/arch/arm64/boot/dts/amazon/alpine-v2.dtsi @@ -145,7 +145,6 @@ msix: msix@fbe00000 { compatible = "al,alpine-msix"; reg = <0x0 0xfbe00000 0x0 0x100000>; - interrupt-controller; msi-controller; al,msi-base-spi = <160>; al,msi-num-spis = <160>; diff --git a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi index 39481d7fd7d4d..3ea178acdddfe 100644 --- a/arch/arm64/boot/dts/amazon/alpine-v3.dtsi +++ b/arch/arm64/boot/dts/amazon/alpine-v3.dtsi @@ -355,7 +355,6 @@ msix: msix@fbe00000 { compatible = "al,alpine-msix"; reg = <0x0 0xfbe00000 0x0 0x100000>; - interrupt-controller; msi-controller; al,msi-base-spi = <336>; al,msi-num-spis = <959>; diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi index 9dcd25ec2c041..896d1f33b5b61 100644 --- a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi @@ -586,6 +586,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; interrupts = ; }; diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi index f049687d6b96d..d8516ec0dae74 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi @@ -450,6 +450,7 @@ #gpio-cells = <2>; gpio-controller; interrupt-controller; + #interrupt-cells = <2>; interrupts = ; gpio-ranges = <&pinmux 0 0 16>, <&pinmux 16 71 2>, diff --git a/arch/arm64/boot/dts/lg/lg1312.dtsi b/arch/arm64/boot/dts/lg/lg1312.dtsi index 48ec4ebec0a83..b864ffa74ea8b 100644 --- a/arch/arm64/boot/dts/lg/lg1312.dtsi +++ b/arch/arm64/boot/dts/lg/lg1312.dtsi @@ -126,7 +126,6 @@ amba { #address-cells = <2>; #size-cells = <1>; - #interrupt-cells = <3>; compatible = "simple-bus"; interrupt-parent = <&gic>; diff --git a/arch/arm64/boot/dts/lg/lg1313.dtsi b/arch/arm64/boot/dts/lg/lg1313.dtsi index 3869460aa5dcb..996fb39bb50c1 100644 --- a/arch/arm64/boot/dts/lg/lg1313.dtsi +++ b/arch/arm64/boot/dts/lg/lg1313.dtsi @@ -126,7 +126,6 @@ amba { #address-cells = <2>; #size-cells = <1>; - #interrupt-cells = <3>; compatible = "simple-bus"; interrupt-parent = <&gic>; diff --git a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi index 2c920e22cec2b..7ec7c789d87ef 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap80x.dtsi @@ -138,7 +138,6 @@ odmi: odmi@300000 { compatible = "marvell,odmi-controller"; - interrupt-controller; msi-controller; marvell,odmi-frames = <4>; reg = <0x300000 0x4000>, diff --git a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts index 69c7f3954ae59..4127cb84eba41 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts +++ b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts @@ -128,6 +128,7 @@ compatible = "mediatek,mt6360"; reg = <0x34>; interrupt-controller; + #interrupt-cells = <1>; interrupts-extended = <&pio 101 IRQ_TYPE_EDGE_FALLING>; interrupt-names = "IRQB"; diff --git a/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi b/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi index 3885ef3454ff6..50de17e4fb3f2 100644 --- a/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi +++ b/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi @@ -234,6 +234,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio6>; interrupts = <8 IRQ_TYPE_EDGE_FALLING>; @@ -294,6 +295,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio6>; interrupts = <4 IRQ_TYPE_EDGE_FALLING>; }; @@ -314,6 +316,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio7>; interrupts = <3 IRQ_TYPE_EDGE_FALLING>; }; @@ -324,6 +327,7 @@ gpio-controller; #gpio-cells = <2>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&gpio5>; interrupts = <9 IRQ_TYPE_EDGE_FALLING>; }; -- GitLab From f02b0f0dc26fbb77fe47b6e47cc5c211f0432c37 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 13 Feb 2024 13:34:28 -0600 Subject: [PATCH 1278/1405] arm: dts: Fix dtc interrupt_map warnings The dtc interrupt_map warning is off because its dependency, interrupt_provider, is off by default. Fix all the warnings so it can be enabled. Signed-off-by: Rob Herring Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20240213-arm-dt-cleanups-v1-4-f2dee1292525@kernel.org Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/intel/ixp/intel-ixp42x-gateway-7001.dts | 2 ++ .../boot/dts/intel/ixp/intel-ixp42x-goramo-multilink.dts | 2 ++ arch/arm/boot/dts/qcom/qcom-sdx55.dtsi | 8 ++++---- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/intel/ixp/intel-ixp42x-gateway-7001.dts b/arch/arm/boot/dts/intel/ixp/intel-ixp42x-gateway-7001.dts index 4d70f6afd13ab..6d5e69035f94d 100644 --- a/arch/arm/boot/dts/intel/ixp/intel-ixp42x-gateway-7001.dts +++ b/arch/arm/boot/dts/intel/ixp/intel-ixp42x-gateway-7001.dts @@ -60,6 +60,8 @@ * We have slots (IDSEL) 1 and 2 with one assigned IRQ * each handling all IRQs. */ + #interrupt-cells = <1>; + interrupt-map-mask = <0xf800 0 0 7>; interrupt-map = /* IDSEL 1 */ <0x0800 0 0 1 &gpio0 11 IRQ_TYPE_LEVEL_LOW>, /* INT A on slot 1 is irq 11 */ diff --git a/arch/arm/boot/dts/intel/ixp/intel-ixp42x-goramo-multilink.dts b/arch/arm/boot/dts/intel/ixp/intel-ixp42x-goramo-multilink.dts index 9ec0169bacf8c..5f4c849915db7 100644 --- a/arch/arm/boot/dts/intel/ixp/intel-ixp42x-goramo-multilink.dts +++ b/arch/arm/boot/dts/intel/ixp/intel-ixp42x-goramo-multilink.dts @@ -89,6 +89,8 @@ * The slots have Ethernet, Ethernet, NEC and MPCI. * The IDSELs are 11, 12, 13, 14. */ + #interrupt-cells = <1>; + interrupt-map-mask = <0xf800 0 0 7>; interrupt-map = /* IDSEL 11 - Ethernet A */ <0x5800 0 0 1 &gpio0 4 IRQ_TYPE_LEVEL_LOW>, /* INT A on slot 11 is irq 4 */ diff --git a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi index 2045fc779f887..27429d0fedfba 100644 --- a/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi +++ b/arch/arm/boot/dts/qcom/qcom-sdx55.dtsi @@ -340,10 +340,10 @@ "msi8"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc 0 0 0 141 IRQ_TYPE_LEVEL_HIGH>, /* int_a */ - <0 0 0 2 &intc 0 0 0 142 IRQ_TYPE_LEVEL_HIGH>, /* int_b */ - <0 0 0 3 &intc 0 0 0 143 IRQ_TYPE_LEVEL_HIGH>, /* int_c */ - <0 0 0 4 &intc 0 0 0 144 IRQ_TYPE_LEVEL_HIGH>; /* int_d */ + interrupt-map = <0 0 0 1 &intc 0 141 IRQ_TYPE_LEVEL_HIGH>, /* int_a */ + <0 0 0 2 &intc 0 142 IRQ_TYPE_LEVEL_HIGH>, /* int_b */ + <0 0 0 3 &intc 0 143 IRQ_TYPE_LEVEL_HIGH>, /* int_c */ + <0 0 0 4 &intc 0 144 IRQ_TYPE_LEVEL_HIGH>; /* int_d */ clocks = <&gcc GCC_PCIE_PIPE_CLK>, <&gcc GCC_PCIE_AUX_CLK>, -- GitLab From 704dccec0d490f2ad06f3f16ebed254d81906c3a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 13 Feb 2024 13:34:29 -0600 Subject: [PATCH 1279/1405] arm64: dts: qcom: Fix interrupt-map cell sizes The PCI node interrupt-map properties have the wrong size as #address-cells in the interrupt parent are not accounted for. The dtc interrupt_map check catches this, but the warning is off because its dependency, interrupt_provider, is off by default. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20240213-arm-dt-cleanups-v1-5-f2dee1292525@kernel.org Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/qcom/ipq6018.dtsi | 8 ++++---- arch/arm64/boot/dts/qcom/ipq8074.dtsi | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi index 5e1277fea7250..61c8fd49c9667 100644 --- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi @@ -830,10 +830,10 @@ #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc 0 75 IRQ_TYPE_LEVEL_HIGH>, /* int_a */ - <0 0 0 2 &intc 0 78 IRQ_TYPE_LEVEL_HIGH>, /* int_b */ - <0 0 0 3 &intc 0 79 IRQ_TYPE_LEVEL_HIGH>, /* int_c */ - <0 0 0 4 &intc 0 83 IRQ_TYPE_LEVEL_HIGH>; /* int_d */ + interrupt-map = <0 0 0 1 &intc 0 0 0 75 IRQ_TYPE_LEVEL_HIGH>, /* int_a */ + <0 0 0 2 &intc 0 0 0 78 IRQ_TYPE_LEVEL_HIGH>, /* int_b */ + <0 0 0 3 &intc 0 0 0 79 IRQ_TYPE_LEVEL_HIGH>, /* int_c */ + <0 0 0 4 &intc 0 0 0 83 IRQ_TYPE_LEVEL_HIGH>; /* int_d */ clocks = <&gcc GCC_SYS_NOC_PCIE0_AXI_CLK>, <&gcc GCC_PCIE0_AXI_M_CLK>, diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index cf295bed32998..26441447c866f 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -814,13 +814,13 @@ interrupt-names = "msi"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc 0 142 + interrupt-map = <0 0 0 1 &intc 0 0 142 IRQ_TYPE_LEVEL_HIGH>, /* int_a */ - <0 0 0 2 &intc 0 143 + <0 0 0 2 &intc 0 0 143 IRQ_TYPE_LEVEL_HIGH>, /* int_b */ - <0 0 0 3 &intc 0 144 + <0 0 0 3 &intc 0 0 144 IRQ_TYPE_LEVEL_HIGH>, /* int_c */ - <0 0 0 4 &intc 0 145 + <0 0 0 4 &intc 0 0 145 IRQ_TYPE_LEVEL_HIGH>; /* int_d */ clocks = <&gcc GCC_SYS_NOC_PCIE1_AXI_CLK>, @@ -876,13 +876,13 @@ interrupt-names = "msi"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; - interrupt-map = <0 0 0 1 &intc 0 75 + interrupt-map = <0 0 0 1 &intc 0 0 75 IRQ_TYPE_LEVEL_HIGH>, /* int_a */ - <0 0 0 2 &intc 0 78 + <0 0 0 2 &intc 0 0 78 IRQ_TYPE_LEVEL_HIGH>, /* int_b */ - <0 0 0 3 &intc 0 79 + <0 0 0 3 &intc 0 0 79 IRQ_TYPE_LEVEL_HIGH>, /* int_c */ - <0 0 0 4 &intc 0 83 + <0 0 0 4 &intc 0 0 83 IRQ_TYPE_LEVEL_HIGH>; /* int_d */ clocks = <&gcc GCC_SYS_NOC_PCIE0_AXI_CLK>, -- GitLab From 0df8669f69a8638f04c6a3d1f3b7056c2c18f62c Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 19 Feb 2024 09:05:38 -0700 Subject: [PATCH 1280/1405] docs: Instruct LaTeX to cope with deeper nesting The addition of the XFS online fsck documentation starting with commit a8f6c2e54ddc ("xfs: document the motivation for online fsck design") added a deeper level of nesting than LaTeX is prepared to deal with. That caused a pdfdocs build failure with the helpful "Too deeply nested" error message buried deeply in Documentation/output/filesystems.log. Increase the "maxlistdepth" parameter to instruct LaTeX that it needs to deal with the deeper nesting whether it wants to or not. Suggested-by: Akira Yokosawa Tested-by: Akira Yokosawa Cc: stable@vger.kernel.org # v6.4+ Link: https://lore.kernel.org/linux-doc/67f6ac60-7957-4b92-9d72-a08fbad0e028@gmail.com/ Signed-off-by: Jonathan Corbet --- Documentation/conf.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/conf.py b/Documentation/conf.py index 5830b01c56429..da64c9fb7e072 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -388,6 +388,12 @@ latex_elements = { verbatimhintsturnover=false, ''', + # + # Some of our authors are fond of deep nesting; tell latex to + # cope. + # + 'maxlistdepth': '10', + # For CJK One-half spacing, need to be in front of hyperref 'extrapackages': r'\usepackage{setspace}', -- GitLab From e3b63e966cac0bf78aaa1efede1827a252815a1d Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Thu, 25 Jan 2024 08:51:27 +0000 Subject: [PATCH 1281/1405] mm: zswap: fix missing folio cleanup in writeback race path In zswap_writeback_entry(), after we get a folio from __read_swap_cache_async(), we grab the tree lock again to check that the swap entry was not invalidated and recycled. If it was, we delete the folio we just added to the swap cache and exit. However, __read_swap_cache_async() returns the folio locked when it is newly allocated, which is always true for this path, and the folio is ref'd. Make sure to unlock and put the folio before returning. This was discovered by code inspection, probably because this path handles a race condition that should not happen often, and the bug would not crash the system, it will only strand the folio indefinitely. Link: https://lkml.kernel.org/r/20240125085127.1327013-1-yosryahmed@google.com Fixes: 04fc7816089c ("mm: fix zswap writeback race condition") Signed-off-by: Yosry Ahmed Reviewed-by: Chengming Zhou Acked-by: Johannes Weiner Reviewed-by: Nhat Pham Cc: Domenico Cerasuolo Cc: Signed-off-by: Andrew Morton --- mm/zswap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/zswap.c b/mm/zswap.c index 350dd2fc81599..d2423247acfd6 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1440,6 +1440,8 @@ static int zswap_writeback_entry(struct zswap_entry *entry, if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) { spin_unlock(&tree->lock); delete_from_swap_cache(folio); + folio_unlock(folio); + folio_put(folio); return -ENOMEM; } spin_unlock(&tree->lock); -- GitLab From e9e3db69966d5e9e6f7e7d017b407c0025180fe5 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 5 Feb 2024 12:13:06 -0800 Subject: [PATCH 1282/1405] mm/damon/core: check apply interval in damon_do_apply_schemes() kdamond_apply_schemes() checks apply intervals of schemes and avoid further applying any schemes if no scheme passed its apply interval. However, the following schemes applying function, damon_do_apply_schemes() iterates all schemes without the apply interval check. As a result, the shortest apply interval is applied to all schemes. Fix the problem by checking the apply interval in damon_do_apply_schemes(). Link: https://lkml.kernel.org/r/20240205201306.88562-1-sj@kernel.org Fixes: 42f994b71404 ("mm/damon/core: implement scheme-specific apply interval") Signed-off-by: SeongJae Park Cc: [6.7.x] Signed-off-by: Andrew Morton --- mm/damon/core.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 36f6f1d21ff06..5b325749fc125 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1026,6 +1026,9 @@ static void damon_do_apply_schemes(struct damon_ctx *c, damon_for_each_scheme(s, c) { struct damos_quota *quota = &s->quota; + if (c->passed_sample_intervals != s->next_apply_sis) + continue; + if (!s->wmarks.activated) continue; @@ -1176,10 +1179,6 @@ static void kdamond_apply_schemes(struct damon_ctx *c) if (c->passed_sample_intervals != s->next_apply_sis) continue; - s->next_apply_sis += - (s->apply_interval_us ? s->apply_interval_us : - c->attrs.aggr_interval) / sample_interval; - if (!s->wmarks.activated) continue; @@ -1195,6 +1194,14 @@ static void kdamond_apply_schemes(struct damon_ctx *c) damon_for_each_region_safe(r, next_r, t) damon_do_apply_schemes(c, t, r); } + + damon_for_each_scheme(s, c) { + if (c->passed_sample_intervals != s->next_apply_sis) + continue; + s->next_apply_sis += + (s->apply_interval_us ? s->apply_interval_us : + c->attrs.aggr_interval) / sample_interval; + } } /* -- GitLab From 7efa6f2c803366f84c3c362f01e822490669d72b Mon Sep 17 00:00:00 2001 From: Terry Tritton Date: Mon, 5 Feb 2024 14:50:56 +0000 Subject: [PATCH 1283/1405] selftests/mm: uffd-unit-test check if huge page size is 0 If HUGETLBFS is not enabled then the default_huge_page_size function will return 0 and cause a divide by 0 error. Add a check to see if the huge page size is 0 and skip the hugetlb tests if it is. Link: https://lkml.kernel.org/r/20240205145055.3545806-2-terry.tritton@linaro.org Fixes: 16a45b57cbf2 ("selftests/mm: add framework for uffd-unit-test") Signed-off-by: Terry Tritton Cc: Peter Griffin Cc: Shuah Khan Cc: Peter Xu Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/uffd-unit-tests.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index cce90a10515ad..2b9f8cc52639d 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -1517,6 +1517,12 @@ int main(int argc, char *argv[]) continue; uffd_test_start("%s on %s", test->name, mem_type->name); + if ((mem_type->mem_flag == MEM_HUGETLB || + mem_type->mem_flag == MEM_HUGETLB_PRIVATE) && + (default_huge_page_size() == 0)) { + uffd_test_skip("huge page size is 0, feature missing?"); + continue; + } if (!uffd_feature_supported(test)) { uffd_test_skip("feature missing"); continue; -- GitLab From 16e96ba5e92ce06b54f0862d44bb27bfa00d6c23 Mon Sep 17 00:00:00 2001 From: Nhat Pham Date: Mon, 5 Feb 2024 15:24:42 -0800 Subject: [PATCH 1284/1405] mm/swap_state: update zswap LRU's protection range with the folio locked When a folio is swapped in, the protection size of the corresponding zswap LRU is incremented, so that the zswap shrinker is more conservative with its reclaiming action. This field is embedded within the struct lruvec, so updating it requires looking up the folio's memcg and lruvec. However, currently this lookup can happen after the folio is unlocked, for instance if a new folio is allocated, and swap_read_folio() unlocks the folio before returning. In this scenario, there is no stability guarantee for the binding between a folio and its memcg and lruvec: * A folio's memcg and lruvec can be freed between the lookup and the update, leading to a UAF. * Folio migration can clear the now-unlocked folio's memcg_data, which directs the zswap LRU protection size update towards the root memcg instead of the original memcg. This was recently picked up by the syzbot thanks to a warning in the inlined folio_lruvec() call. Move the zswap LRU protection range update above the swap_read_folio() call, and only when a new page is allocated, to prevent this. [nphamcs@gmail.com: add VM_WARN_ON_ONCE() to zswap_folio_swapin()] Link: https://lkml.kernel.org/r/20240206180855.3987204-1-nphamcs@gmail.com [nphamcs@gmail.com: remove unneeded if (folio) checks] Link: https://lkml.kernel.org/r/20240206191355.83755-1-nphamcs@gmail.com Link: https://lkml.kernel.org/r/20240205232442.3240571-1-nphamcs@gmail.com Fixes: b5ba474f3f51 ("zswap: shrink zswap pool based on memory pressure") Reported-by: syzbot+17a611d10af7d18a7092@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000ae47f90610803260@google.com/ Signed-off-by: Nhat Pham Reviewed-by: Chengming Zhou Acked-by: Johannes Weiner Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- mm/swap_state.c | 10 ++++++---- mm/zswap.c | 7 +++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/mm/swap_state.c b/mm/swap_state.c index e671266ad7724..7255c01a1e4e1 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -680,9 +680,10 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, /* The page was likely read above, so no need for plugging here */ folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx, &page_allocated, false); - if (unlikely(page_allocated)) + if (unlikely(page_allocated)) { + zswap_folio_swapin(folio); swap_read_folio(folio, false, NULL); - zswap_folio_swapin(folio); + } return folio; } @@ -855,9 +856,10 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask, /* The folio was likely read above, so no need for plugging here */ folio = __read_swap_cache_async(targ_entry, gfp_mask, mpol, targ_ilx, &page_allocated, false); - if (unlikely(page_allocated)) + if (unlikely(page_allocated)) { + zswap_folio_swapin(folio); swap_read_folio(folio, false, NULL); - zswap_folio_swapin(folio); + } return folio; } diff --git a/mm/zswap.c b/mm/zswap.c index d2423247acfd6..36903d938c15e 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -377,10 +377,9 @@ void zswap_folio_swapin(struct folio *folio) { struct lruvec *lruvec; - if (folio) { - lruvec = folio_lruvec(folio); - atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected); - } + VM_WARN_ON_ONCE(!folio_test_locked(folio)); + lruvec = folio_lruvec(folio); + atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected); } /********************************* -- GitLab From 13ddaf26be324a7f951891ecd9ccd04466d27458 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Wed, 7 Feb 2024 02:25:59 +0800 Subject: [PATCH 1285/1405] mm/swap: fix race when skipping swapcache When skipping swapcache for SWP_SYNCHRONOUS_IO, if two or more threads swapin the same entry at the same time, they get different pages (A, B). Before one thread (T0) finishes the swapin and installs page (A) to the PTE, another thread (T1) could finish swapin of page (B), swap_free the entry, then swap out the possibly modified page reusing the same entry. It breaks the pte_same check in (T0) because PTE value is unchanged, causing ABA problem. Thread (T0) will install a stalled page (A) into the PTE and cause data corruption. One possible callstack is like this: CPU0 CPU1 ---- ---- do_swap_page() do_swap_page() with same entry swap_read_folio() <- read to page A swap_read_folio() <- read to page B ... set_pte_at() swap_free() <- entry is free pte_same() <- Check pass, PTE seems unchanged, but page A is stalled! swap_free() <- page B content lost! set_pte_at() <- staled page A installed! And besides, for ZRAM, swap_free() allows the swap device to discard the entry content, so even if page (B) is not modified, if swap_read_folio() on CPU0 happens later than swap_free() on CPU1, it may also cause data loss. To fix this, reuse swapcache_prepare which will pin the swap entry using the cache flag, and allow only one thread to swap it in, also prevent any parallel code from putting the entry in the cache. Release the pin after PT unlocked. Racers just loop and wait since it's a rare and very short event. A schedule_timeout_uninterruptible(1) call is added to avoid repeated page faults wasting too much CPU, causing livelock or adding too much noise to perf statistics. A similar livelock issue was described in commit 029c4628b2eb ("mm: swap: get rid of livelock in swapin readahead") Reproducer: This race issue can be triggered easily using a well constructed reproducer and patched brd (with a delay in read path) [1]: With latest 6.8 mainline, race caused data loss can be observed easily: $ gcc -g -lpthread test-thread-swap-race.c && ./a.out Polulating 32MB of memory region... Keep swapping out... Starting round 0... Spawning 65536 workers... 32746 workers spawned, wait for done... Round 0: Error on 0x5aa00, expected 32746, got 32743, 3 data loss! Round 0: Error on 0x395200, expected 32746, got 32743, 3 data loss! Round 0: Error on 0x3fd000, expected 32746, got 32737, 9 data loss! Round 0 Failed, 15 data loss! This reproducer spawns multiple threads sharing the same memory region using a small swap device. Every two threads updates mapped pages one by one in opposite direction trying to create a race, with one dedicated thread keep swapping out the data out using madvise. The reproducer created a reproduce rate of about once every 5 minutes, so the race should be totally possible in production. After this patch, I ran the reproducer for over a few hundred rounds and no data loss observed. Performance overhead is minimal, microbenchmark swapin 10G from 32G zram: Before: 10934698 us After: 11157121 us Cached: 13155355 us (Dropping SWP_SYNCHRONOUS_IO flag) [kasong@tencent.com: v4] Link: https://lkml.kernel.org/r/20240219082040.7495-1-ryncsn@gmail.com Link: https://lkml.kernel.org/r/20240206182559.32264-1-ryncsn@gmail.com Fixes: 0bcac06f27d7 ("mm, swap: skip swapcache for swapin of synchronous device") Reported-by: "Huang, Ying" Closes: https://lore.kernel.org/lkml/87bk92gqpx.fsf_-_@yhuang6-desk2.ccr.corp.intel.com/ Link: https://github.com/ryncsn/emm-test-project/tree/master/swap-stress-race [1] Signed-off-by: Kairui Song Reviewed-by: "Huang, Ying" Acked-by: Yu Zhao Acked-by: David Hildenbrand Acked-by: Chris Li Cc: Hugh Dickins Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Minchan Kim Cc: Yosry Ahmed Cc: Yu Zhao Cc: Barry Song <21cnbao@gmail.com> Cc: SeongJae Park Cc: Signed-off-by: Andrew Morton --- include/linux/swap.h | 5 +++++ mm/memory.c | 20 ++++++++++++++++++++ mm/swap.h | 5 +++++ mm/swapfile.c | 13 +++++++++++++ 4 files changed, 43 insertions(+) diff --git a/include/linux/swap.h b/include/linux/swap.h index 4db00ddad2616..8d28f6091a320 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -549,6 +549,11 @@ static inline int swap_duplicate(swp_entry_t swp) return 0; } +static inline int swapcache_prepare(swp_entry_t swp) +{ + return 0; +} + static inline void swap_free(swp_entry_t swp) { } diff --git a/mm/memory.c b/mm/memory.c index 15f8b10ea17c4..0bfc8b007c01a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3799,6 +3799,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) struct page *page; struct swap_info_struct *si = NULL; rmap_t rmap_flags = RMAP_NONE; + bool need_clear_cache = false; bool exclusive = false; swp_entry_t entry; pte_t pte; @@ -3867,6 +3868,20 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!folio) { if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && __swap_count(entry) == 1) { + /* + * Prevent parallel swapin from proceeding with + * the cache flag. Otherwise, another thread may + * finish swapin first, free the entry, and swapout + * reusing the same entry. It's undetectable as + * pte_same() returns true due to entry reuse. + */ + if (swapcache_prepare(entry)) { + /* Relax a bit to prevent rapid repeated page faults */ + schedule_timeout_uninterruptible(1); + goto out; + } + need_clear_cache = true; + /* skip swapcache */ folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vmf->address, false); @@ -4117,6 +4132,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (vmf->pte) pte_unmap_unlock(vmf->pte, vmf->ptl); out: + /* Clear the swap cache pin for direct swapin after PTL unlock */ + if (need_clear_cache) + swapcache_clear(si, entry); if (si) put_swap_device(si); return ret; @@ -4131,6 +4149,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) folio_unlock(swapcache); folio_put(swapcache); } + if (need_clear_cache) + swapcache_clear(si, entry); if (si) put_swap_device(si); return ret; diff --git a/mm/swap.h b/mm/swap.h index 758c46ca671ed..fc2f6ade7f80b 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -41,6 +41,7 @@ void __delete_from_swap_cache(struct folio *folio, void delete_from_swap_cache(struct folio *folio); void clear_shadow_from_swap_cache(int type, unsigned long begin, unsigned long end); +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry); struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr); struct folio *filemap_get_incore_folio(struct address_space *mapping, @@ -97,6 +98,10 @@ static inline int swap_writepage(struct page *p, struct writeback_control *wbc) return 0; } +static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ +} + static inline struct folio *swap_cache_get_folio(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr) { diff --git a/mm/swapfile.c b/mm/swapfile.c index 556ff7347d5f0..746aa9da53025 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3365,6 +3365,19 @@ int swapcache_prepare(swp_entry_t entry) return __swap_duplicate(entry, SWAP_HAS_CACHE); } +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ + struct swap_cluster_info *ci; + unsigned long offset = swp_offset(entry); + unsigned char usage; + + ci = lock_cluster_or_swap_info(si, offset); + usage = __swap_entry_free_locked(si, offset, SWAP_HAS_CACHE); + unlock_cluster_or_swap_info(si, ci); + if (!usage) + free_swap_slot(entry); +} + struct swap_info_struct *swp_swap_info(swp_entry_t entry) { return swap_type_to_swap_info(swp_type(entry)); -- GitLab From 1eb1e984379e2da04361763f66eec90dd75cf63e Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 8 Feb 2024 07:30:10 -0800 Subject: [PATCH 1286/1405] lib/Kconfig.debug: TEST_IOV_ITER depends on MMU Trying to run the iov_iter unit test on a nommu system such as the qemu kc705-nommu emulation results in a crash. KTAP version 1 # Subtest: iov_iter # module: kunit_iov_iter 1..9 BUG: failure at mm/nommu.c:318/vmap()! Kernel panic - not syncing: BUG! The test calls vmap() directly, but vmap() is not supported on nommu systems, causing the crash. TEST_IOV_ITER therefore needs to depend on MMU. Link: https://lkml.kernel.org/r/20240208153010.1439753-1-linux@roeck-us.net Fixes: 2d71340ff1d4 ("iov_iter: Kunit tests for copying to/from an iterator") Signed-off-by: Guenter Roeck Cc: David Howells Cc: Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 975a07f9f1cc0..ef36b829ae1f5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2235,6 +2235,7 @@ config TEST_DIV64 config TEST_IOV_ITER tristate "Test iov_iter operation" if !KUNIT_ALL_TESTS depends on KUNIT + depends on MMU default KUNIT_ALL_TESTS help Enable this to turn on testing of the operation of the I/O iterator -- GitLab From 678e54d4bb9a4822f8ae99690ac131c5d490cdb1 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Thu, 8 Feb 2024 02:32:54 +0000 Subject: [PATCH 1287/1405] mm/zswap: invalidate duplicate entry when !zswap_enabled We have to invalidate any duplicate entry even when !zswap_enabled since zswap can be disabled anytime. If the folio store success before, then got dirtied again but zswap disabled, we won't invalidate the old duplicate entry in the zswap_store(). So later lru writeback may overwrite the new data in swapfile. Link: https://lkml.kernel.org/r/20240208023254.3873823-1-chengming.zhou@linux.dev Fixes: 42c06a0e8ebe ("mm: kill frontswap") Signed-off-by: Chengming Zhou Acked-by: Johannes Weiner Cc: Nhat Pham Cc: Yosry Ahmed Cc: Signed-off-by: Andrew Morton --- mm/zswap.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/zswap.c b/mm/zswap.c index 36903d938c15e..db4625af65fb7 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1518,7 +1518,7 @@ bool zswap_store(struct folio *folio) if (folio_test_large(folio)) return false; - if (!zswap_enabled || !tree) + if (!tree) return false; /* @@ -1533,6 +1533,10 @@ bool zswap_store(struct folio *folio) zswap_invalidate_entry(tree, dupentry); } spin_unlock(&tree->lock); + + if (!zswap_enabled) + return false; + objcg = get_obj_cgroup_from_folio(folio); if (objcg && !obj_cgroup_may_zswap(objcg)) { memcg = get_mem_cgroup_from_objcg(objcg); -- GitLab From 4f155af0ae4464134bfcfd9f043b6b727c84e947 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 9 Feb 2024 08:39:12 +0530 Subject: [PATCH 1288/1405] mm/memblock: add MEMBLOCK_RSRV_NOINIT into flagname[] array The commit 77e6c43e137c ("memblock: introduce MEMBLOCK_RSRV_NOINIT flag") skipped adding this newly introduced memblock flag into flagname[] array, thus preventing a correct memblock flags output for applicable memblock regions. Link: https://lkml.kernel.org/r/20240209030912.1382251-1-anshuman.khandual@arm.com Fixes: 77e6c43e137c ("memblock: introduce MEMBLOCK_RSRV_NOINIT flag") Signed-off-by: Anshuman Khandual Reviewed-by: Mike Rapoport Cc: Signed-off-by: Andrew Morton --- mm/memblock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/memblock.c b/mm/memblock.c index 4dcb2ee35eca8..d9f4b82cbffeb 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -2249,6 +2249,7 @@ static const char * const flagname[] = { [ilog2(MEMBLOCK_MIRROR)] = "MIRROR", [ilog2(MEMBLOCK_NOMAP)] = "NOMAP", [ilog2(MEMBLOCK_DRIVER_MANAGED)] = "DRV_MNG", + [ilog2(MEMBLOCK_RSRV_NOINIT)] = "RSV_NIT", }; static int memblock_debug_show(struct seq_file *m, void *private) -- GitLab From 118642d7f606fc9b9c92ee611275420320290ffb Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 13 Feb 2024 03:16:34 -0500 Subject: [PATCH 1289/1405] mm: memcontrol: clarify swapaccount=0 deprecation warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The swapaccount deprecation warning is throwing false positives. Since we deprecated the knob and defaulted to enabling, the only reports we've been getting are from folks that set swapaccount=1. While this is a nice affirmation that always-enabling was the right choice, we certainly don't want to warn when users request the supported mode. Only warn when disabling is requested, and clarify the warning. [colin.i.king@gmail.com: spelling: "commdandline" -> "commandline"] Link: https://lkml.kernel.org/r/20240215090544.1649201-1-colin.i.king@gmail.com Link: https://lkml.kernel.org/r/20240213081634.3652326-1-hannes@cmpxchg.org Fixes: b25806dcd3d5 ("mm: memcontrol: deprecate swapaccounting=0 mode") Signed-off-by: Colin Ian King Reported-by: "Jonas Schäfer" Reported-by: Narcis Garcia Suggested-by: Yosry Ahmed Signed-off-by: Johannes Weiner Reviewed-by: Yosry Ahmed Acked-by: Michal Hocko Acked-by: Shakeel Butt Cc: Roman Gushchin Cc: Signed-off-by: Andrew Morton --- mm/memcontrol.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1ed40f9d3a277..61932c9215e77 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7971,9 +7971,13 @@ bool mem_cgroup_swap_full(struct folio *folio) static int __init setup_swap_account(char *s) { - pr_warn_once("The swapaccount= commandline option is deprecated. " - "Please report your usecase to linux-mm@kvack.org if you " - "depend on this functionality.\n"); + bool res; + + if (!kstrtobool(s, &res) && !res) + pr_warn_once("The swapaccount=0 commandline option is deprecated " + "in favor of configuring swap control via cgroupfs. " + "Please report your usecase to linux-mm@kvack.org if you " + "depend on this functionality.\n"); return 1; } __setup("swapaccount=", setup_swap_account); -- GitLab From 0721a614ef798053a4a54c74e2501b8d15b0eff3 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 12 Feb 2024 18:36:32 -0800 Subject: [PATCH 1290/1405] mm/damon/sysfs-schemes: handle schemes sysfs dir removal before commit_schemes_quota_goals 'commit_schemes_quota_goals' command handler, damos_sysfs_set_quota_scores() assumes the number of schemes sysfs directory will be same to the number of schemes of the DAMON context. The assumption is wrong since users can remove schemes sysfs directories while DAMON is running. In the case, illegal memory accesses can happen. Fix it by checking the case. Link: https://lkml.kernel.org/r/20240213023633.124928-1-sj@kernel.org Fixes: d91beaa505a0 ("mm/damon/sysfs-schemes: implement a command for scheme quota goals only commit") Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- mm/damon/sysfs-schemes.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c index dd2fb51270092..ae0f0b314f3a9 100644 --- a/mm/damon/sysfs-schemes.c +++ b/mm/damon/sysfs-schemes.c @@ -1905,6 +1905,10 @@ void damos_sysfs_set_quota_scores(struct damon_sysfs_schemes *sysfs_schemes, damon_for_each_scheme(scheme, ctx) { struct damon_sysfs_scheme *sysfs_scheme; + /* user could have removed the scheme sysfs dir */ + if (i >= sysfs_schemes->nr) + break; + sysfs_scheme = sysfs_schemes->schemes_arr[i]; damos_sysfs_set_quota_score(sysfs_scheme->quotas->goals, &scheme->quota); -- GitLab From 379c5aaa14351aa9faf1f569fe9e3c4f4b02f6a0 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Mon, 19 Feb 2024 12:50:50 -0800 Subject: [PATCH 1291/1405] MAINTAINERS: mailmap: update Shakeel's email address Moving to linux.dev based email for kernel work. Link: https://lkml.kernel.org/r/20240219205050.887810-1-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt Signed-off-by: Andrew Morton --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index b99a238ee3bde..08f28f2999f0d 100644 --- a/.mailmap +++ b/.mailmap @@ -553,6 +553,7 @@ Senthilkumar N L Serge Hallyn Serge Hallyn Seth Forshee +Shakeel Butt Shannon Nelson Shannon Nelson Shannon Nelson diff --git a/MAINTAINERS b/MAINTAINERS index 9ed4d38685394..c3c9cf33595cc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5378,7 +5378,7 @@ CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG) M: Johannes Weiner M: Michal Hocko M: Roman Gushchin -M: Shakeel Butt +M: Shakeel Butt R: Muchun Song L: cgroups@vger.kernel.org L: linux-mm@kvack.org -- GitLab From 1b0ca4e4ff10a2c8402e2cf70132c683e1c772e4 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 16 Feb 2024 11:40:24 -0800 Subject: [PATCH 1292/1405] mm/damon/reclaim: fix quota stauts loss due to online tunings Patch series "mm/damon: fix quota status loss due to online tunings". DAMON_RECLAIM and DAMON_LRU_SORT is not preserving internal quota status when applying new user parameters, and hence could cause temporal quota accuracy degradation. Fix it by preserving the status. This patch (of 2): For online parameters change, DAMON_RECLAIM creates new scheme based on latest values of the parameters and replaces the old scheme with the new one. When creating it, the internal status of the quota of the old scheme is not preserved. As a result, charging of the quota starts from zero after the online tuning. The data that collected to estimate the throughput of the scheme's action is also reset, and therefore the estimation should start from the scratch again. Because the throughput estimation is being used to convert the time quota to the effective size quota, this could result in temporal time quota inaccuracy. It would be recovered over time, though. In short, the quota accuracy could be temporarily degraded after online parameters update. Fix the problem by checking the case and copying the internal fields for the status. Link: https://lkml.kernel.org/r/20240216194025.9207-1-sj@kernel.org Link: https://lkml.kernel.org/r/20240216194025.9207-2-sj@kernel.org Fixes: e035c280f6df ("mm/damon/reclaim: support online inputs update") Signed-off-by: SeongJae Park Cc: [5.19+] Signed-off-by: Andrew Morton --- mm/damon/reclaim.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index ab974e477d2f2..66e190f0374ac 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -150,9 +150,20 @@ static struct damos *damon_reclaim_new_scheme(void) &damon_reclaim_wmarks); } +static void damon_reclaim_copy_quota_status(struct damos_quota *dst, + struct damos_quota *src) +{ + dst->total_charged_sz = src->total_charged_sz; + dst->total_charged_ns = src->total_charged_ns; + dst->charged_sz = src->charged_sz; + dst->charged_from = src->charged_from; + dst->charge_target_from = src->charge_target_from; + dst->charge_addr_from = src->charge_addr_from; +} + static int damon_reclaim_apply_parameters(void) { - struct damos *scheme; + struct damos *scheme, *old_scheme; struct damos_filter *filter; int err = 0; @@ -164,6 +175,11 @@ static int damon_reclaim_apply_parameters(void) scheme = damon_reclaim_new_scheme(); if (!scheme) return -ENOMEM; + if (!list_empty(&ctx->schemes)) { + damon_for_each_scheme(old_scheme, ctx) + damon_reclaim_copy_quota_status(&scheme->quota, + &old_scheme->quota); + } if (skip_anon) { filter = damos_new_filter(DAMOS_FILTER_TYPE_ANON, true); if (!filter) { -- GitLab From 13d0599ab3b2ff17f798353f24bcbef1659d3cfc Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 16 Feb 2024 11:40:25 -0800 Subject: [PATCH 1293/1405] mm/damon/lru_sort: fix quota status loss due to online tunings For online parameters change, DAMON_LRU_SORT creates new schemes based on latest values of the parameters and replaces the old schemes with the new one. When creating it, the internal status of the quotas of the old schemes is not preserved. As a result, charging of the quota starts from zero after the online tuning. The data that collected to estimate the throughput of the scheme's action is also reset, and therefore the estimation should start from the scratch again. Because the throughput estimation is being used to convert the time quota to the effective size quota, this could result in temporal time quota inaccuracy. It would be recovered over time, though. In short, the quota accuracy could be temporarily degraded after online parameters update. Fix the problem by checking the case and copying the internal fields for the status. Link: https://lkml.kernel.org/r/20240216194025.9207-3-sj@kernel.org Fixes: 40e983cca927 ("mm/damon: introduce DAMON-based LRU-lists Sorting") Signed-off-by: SeongJae Park Cc: [6.0+] Signed-off-by: Andrew Morton --- mm/damon/lru_sort.c | 43 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c index f2e5f9431892e..3de2916a65c38 100644 --- a/mm/damon/lru_sort.c +++ b/mm/damon/lru_sort.c @@ -185,9 +185,21 @@ static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres) return damon_lru_sort_new_scheme(&pattern, DAMOS_LRU_DEPRIO); } +static void damon_lru_sort_copy_quota_status(struct damos_quota *dst, + struct damos_quota *src) +{ + dst->total_charged_sz = src->total_charged_sz; + dst->total_charged_ns = src->total_charged_ns; + dst->charged_sz = src->charged_sz; + dst->charged_from = src->charged_from; + dst->charge_target_from = src->charge_target_from; + dst->charge_addr_from = src->charge_addr_from; +} + static int damon_lru_sort_apply_parameters(void) { - struct damos *scheme; + struct damos *scheme, *hot_scheme, *cold_scheme; + struct damos *old_hot_scheme = NULL, *old_cold_scheme = NULL; unsigned int hot_thres, cold_thres; int err = 0; @@ -195,18 +207,35 @@ static int damon_lru_sort_apply_parameters(void) if (err) return err; + damon_for_each_scheme(scheme, ctx) { + if (!old_hot_scheme) { + old_hot_scheme = scheme; + continue; + } + old_cold_scheme = scheme; + } + hot_thres = damon_max_nr_accesses(&damon_lru_sort_mon_attrs) * hot_thres_access_freq / 1000; - scheme = damon_lru_sort_new_hot_scheme(hot_thres); - if (!scheme) + hot_scheme = damon_lru_sort_new_hot_scheme(hot_thres); + if (!hot_scheme) return -ENOMEM; - damon_set_schemes(ctx, &scheme, 1); + if (old_hot_scheme) + damon_lru_sort_copy_quota_status(&hot_scheme->quota, + &old_hot_scheme->quota); cold_thres = cold_min_age / damon_lru_sort_mon_attrs.aggr_interval; - scheme = damon_lru_sort_new_cold_scheme(cold_thres); - if (!scheme) + cold_scheme = damon_lru_sort_new_cold_scheme(cold_thres); + if (!cold_scheme) { + damon_destroy_scheme(hot_scheme); return -ENOMEM; - damon_add_scheme(ctx, scheme); + } + if (old_cold_scheme) + damon_lru_sort_copy_quota_status(&cold_scheme->quota, + &old_cold_scheme->quota); + + damon_set_schemes(ctx, &hot_scheme, 1); + damon_add_scheme(ctx, cold_scheme); return damon_set_region_biggest_system_ram_default(target, &monitor_region_start, -- GitLab From 2597c9947b0174fcc71bdd7ab6cb49c2b4291e95 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 13 Feb 2024 14:39:58 +1100 Subject: [PATCH 1294/1405] kasan: guard release_free_meta() shadow access with kasan_arch_is_ready() release_free_meta() accesses the shadow directly through the path kasan_slab_free __kasan_slab_free kasan_release_object_meta release_free_meta kasan_mem_to_shadow There are no kasan_arch_is_ready() guards here, allowing an oops when the shadow is not initialized. The oops can be seen on a Power8 KVM guest. This patch adds the guard to release_free_meta(), as it's the first level that specifically requires the shadow. It is safe to put the guard at the start of this function, before the stack put: only kasan_save_free_info() can initialize the saved stack, which itself is guarded with kasan_arch_is_ready() by its caller poison_slab_object(). If the arch becomes ready before release_free_meta() then we will not observe KASAN_SLAB_FREE_META in the object's shadow, so we will not put an uninitialized stack either. Link: https://lkml.kernel.org/r/20240213033958.139383-1-bgray@linux.ibm.com Fixes: 63b85ac56a64 ("kasan: stop leaking stack trace handles") Signed-off-by: Benjamin Gray Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Michael Ellerman Cc: Vincenzo Frascino Signed-off-by: Andrew Morton --- mm/kasan/generic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index df6627f62402c..032bf3e98c240 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -522,6 +522,9 @@ static void release_alloc_meta(struct kasan_alloc_meta *meta) static void release_free_meta(const void *object, struct kasan_free_meta *meta) { + if (!kasan_arch_is_ready()) + return; + /* Check if free meta is valid. */ if (*(u8 *)kasan_mem_to_shadow(object) != KASAN_SLAB_FREE_META) return; -- GitLab From f3e6b3ae9cfc128af11b665c6ef4022ba2683778 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 18 Feb 2024 14:28:57 -0800 Subject: [PATCH 1295/1405] acpi/ghes: Remove CXL CPER notifications Initial tests with the CXL CPER implementation identified that error reports were being duplicated in the log and the trace event [1]. Then it was discovered that the notification handler took sleeping locks while the GHES event handling runs in spin_lock_irqsave() context [2] While the duplicate reporting was fixed in v6.8-rc4, the fix for the sleeping-lock-vs-atomic collision would enjoy more time to settle and gain some test cycles. Given how late it is in the development cycle, remove the CXL hookup for now and try again during the next merge window. Note that end result is that v6.8 does not emit CXL CPER payloads to the kernel log, but this is in line with the CXL trend to move error reporting to trace events instead of the kernel log. Cc: Ard Biesheuvel Cc: Rafael J. Wysocki Cc: Jonathan Cameron Reviewed-by: Ira Weiny Link: http://lore.kernel.org/r/20240108165855.00002f5a@Huawei.com [1] Closes: http://lore.kernel.org/r/b963c490-2c13-4b79-bbe7-34c6568423c7@moroto.mountain [2] Signed-off-by: Dan Williams --- drivers/acpi/apei/ghes.c | 63 --------------------------------------- drivers/cxl/pci.c | 57 +---------------------------------- include/linux/cxl-event.h | 18 ----------- 3 files changed, 1 insertion(+), 137 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index fe825a432c5bf..ab2a82cb1b0b4 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -674,52 +673,6 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, schedule_work(&entry->work); } -/* - * Only a single callback can be registered for CXL CPER events. - */ -static DECLARE_RWSEM(cxl_cper_rw_sem); -static cxl_cper_callback cper_callback; - -static void cxl_cper_post_event(enum cxl_event_type event_type, - struct cxl_cper_event_rec *rec) -{ - if (rec->hdr.length <= sizeof(rec->hdr) || - rec->hdr.length > sizeof(*rec)) { - pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n", - rec->hdr.length); - return; - } - - if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) { - pr_err(FW_WARN "CXL CPER invalid event\n"); - return; - } - - guard(rwsem_read)(&cxl_cper_rw_sem); - if (cper_callback) - cper_callback(event_type, rec); -} - -int cxl_cper_register_callback(cxl_cper_callback callback) -{ - guard(rwsem_write)(&cxl_cper_rw_sem); - if (cper_callback) - return -EINVAL; - cper_callback = callback; - return 0; -} -EXPORT_SYMBOL_NS_GPL(cxl_cper_register_callback, CXL); - -int cxl_cper_unregister_callback(cxl_cper_callback callback) -{ - guard(rwsem_write)(&cxl_cper_rw_sem); - if (callback != cper_callback) - return -EINVAL; - cper_callback = NULL; - return 0; -} -EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_callback, CXL); - static bool ghes_do_proc(struct ghes *ghes, const struct acpi_hest_generic_status *estatus) { @@ -754,22 +707,6 @@ static bool ghes_do_proc(struct ghes *ghes, } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { queued = ghes_handle_arm_hw_error(gdata, sev, sync); - } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) { - struct cxl_cper_event_rec *rec = - acpi_hest_get_payload(gdata); - - cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec); - } else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) { - struct cxl_cper_event_rec *rec = - acpi_hest_get_payload(gdata); - - cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec); - } else if (guid_equal(sec_type, - &CPER_SEC_CXL_MEM_MODULE_GUID)) { - struct cxl_cper_event_rec *rec = - acpi_hest_get_payload(gdata); - - cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec); } else { void *err = acpi_hest_get_payload(gdata); diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 233e7c42c161d..2ff361e756d66 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -974,61 +974,6 @@ static struct pci_driver cxl_pci_driver = { }, }; -#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0) -static void cxl_cper_event_call(enum cxl_event_type ev_type, - struct cxl_cper_event_rec *rec) -{ - struct cper_cxl_event_devid *device_id = &rec->hdr.device_id; - struct pci_dev *pdev __free(pci_dev_put) = NULL; - enum cxl_event_log_type log_type; - struct cxl_dev_state *cxlds; - unsigned int devfn; - u32 hdr_flags; - - devfn = PCI_DEVFN(device_id->device_num, device_id->func_num); - pdev = pci_get_domain_bus_and_slot(device_id->segment_num, - device_id->bus_num, devfn); - if (!pdev) - return; - - guard(pci_dev)(pdev); - if (pdev->driver != &cxl_pci_driver) - return; - - cxlds = pci_get_drvdata(pdev); - if (!cxlds) - return; - - /* Fabricate a log type */ - hdr_flags = get_unaligned_le24(rec->event.generic.hdr.flags); - log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags); - - cxl_event_trace_record(cxlds->cxlmd, log_type, ev_type, - &uuid_null, &rec->event); -} - -static int __init cxl_pci_driver_init(void) -{ - int rc; - - rc = cxl_cper_register_callback(cxl_cper_event_call); - if (rc) - return rc; - - rc = pci_register_driver(&cxl_pci_driver); - if (rc) - cxl_cper_unregister_callback(cxl_cper_event_call); - - return rc; -} - -static void __exit cxl_pci_driver_exit(void) -{ - pci_unregister_driver(&cxl_pci_driver); - cxl_cper_unregister_callback(cxl_cper_event_call); -} - -module_init(cxl_pci_driver_init); -module_exit(cxl_pci_driver_exit); +module_pci_driver(cxl_pci_driver); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h index 91125eca4c8ab..03fa6d50d46fe 100644 --- a/include/linux/cxl-event.h +++ b/include/linux/cxl-event.h @@ -140,22 +140,4 @@ struct cxl_cper_event_rec { union cxl_event event; } __packed; -typedef void (*cxl_cper_callback)(enum cxl_event_type type, - struct cxl_cper_event_rec *rec); - -#ifdef CONFIG_ACPI_APEI_GHES -int cxl_cper_register_callback(cxl_cper_callback callback); -int cxl_cper_unregister_callback(cxl_cper_callback callback); -#else -static inline int cxl_cper_register_callback(cxl_cper_callback callback) -{ - return 0; -} - -static inline int cxl_cper_unregister_callback(cxl_cper_callback callback) -{ - return 0; -} -#endif - #endif /* _LINUX_CXL_EVENT_H */ -- GitLab From 5c6224bfabbf7f3e491c51ab50fd2c6f92ba1141 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 16 Feb 2024 19:11:34 -0800 Subject: [PATCH 1296/1405] cxl/acpi: Fix load failures due to single window creation failure The expectation is that cxl_parse_cfwms() continues in the face the of failure as evidenced by code like: cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb); if (IS_ERR(cxlrd)) return 0; There are other error paths in that function which mistakenly follow idiomatic expectations and return an error when they should not. Most of those mistakes are innocuous checks that hardly ever fail in practice. However, a recent change succeed in making the implementation more fragile by applying an idiomatic, but still wrong "fix" [1]. In this failure case the kernel reports: cxl root0: Failed to populate active decoder targets cxl_acpi ACPI0017:00: Failed to add decode range: [mem 0x00000000-0x7fffffff flags 0x200] ...which is a real issue with that one window (to be fixed separately), but ends up failing the entirety of cxl_acpi_probe(). Undo that recent breakage while also removing the confusion about ignoring errors. Update all exits paths to return an error per typical expectations and let an outer wrapper function handle dropping the error. Fixes: 91019b5bc7c2 ("cxl/acpi: Return 'rc' instead of '0' in cxl_parse_cfmws()") [1] Cc: Cc: Breno Leitao Cc: Alison Schofield Cc: Vishal Verma Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 46 ++++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index dcf2b39e10488..1a3e6aafbdcc3 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -316,31 +316,27 @@ static const struct cxl_root_ops acpi_root_ops = { .qos_class = cxl_acpi_qos_class, }; -static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, - const unsigned long end) +static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, + struct cxl_cfmws_context *ctx) { int target_map[CXL_DECODER_MAX_INTERLEAVE]; - struct cxl_cfmws_context *ctx = arg; struct cxl_port *root_port = ctx->root_port; struct resource *cxl_res = ctx->cxl_res; struct cxl_cxims_context cxims_ctx; struct cxl_root_decoder *cxlrd; struct device *dev = ctx->dev; - struct acpi_cedt_cfmws *cfmws; cxl_calc_hb_fn cxl_calc_hb; struct cxl_decoder *cxld; unsigned int ways, i, ig; struct resource *res; int rc; - cfmws = (struct acpi_cedt_cfmws *) header; - rc = cxl_acpi_cfmws_verify(dev, cfmws); if (rc) { dev_err(dev, "CFMWS range %#llx-%#llx not registered\n", cfmws->base_hpa, cfmws->base_hpa + cfmws->window_size - 1); - return 0; + return rc; } rc = eiw_to_ways(cfmws->interleave_ways, &ways); @@ -376,7 +372,7 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb); if (IS_ERR(cxlrd)) - return 0; + return PTR_ERR(cxlrd); cxld = &cxlrd->cxlsd.cxld; cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions); @@ -420,16 +416,7 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, put_device(&cxld->dev); else rc = cxl_decoder_autoremove(dev, cxld); - if (rc) { - dev_err(dev, "Failed to add decode range: %pr", res); - return rc; - } - dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n", - dev_name(&cxld->dev), - phys_to_target_node(cxld->hpa_range.start), - cxld->hpa_range.start, cxld->hpa_range.end); - - return 0; + return rc; err_insert: kfree(res->name); @@ -438,6 +425,29 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, return -ENOMEM; } +static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, + const unsigned long end) +{ + struct acpi_cedt_cfmws *cfmws = (struct acpi_cedt_cfmws *)header; + struct cxl_cfmws_context *ctx = arg; + struct device *dev = ctx->dev; + int rc; + + rc = __cxl_parse_cfmws(cfmws, ctx); + if (rc) + dev_err(dev, + "Failed to add decode range: [%#llx - %#llx] (%d)\n", + cfmws->base_hpa, + cfmws->base_hpa + cfmws->window_size - 1, rc); + else + dev_dbg(dev, "decode range: node: %d range [%#llx - %#llx]\n", + phys_to_target_node(cfmws->base_hpa), cfmws->base_hpa, + cfmws->base_hpa + cfmws->window_size - 1); + + /* never fail cxl_acpi load for a single window failure */ + return 0; +} + __mock struct acpi_device *to_cxl_host_bridge(struct device *host, struct device *dev) { -- GitLab From fb1e881273f432e593f8789f99e725b09304cc97 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 20 Feb 2024 14:12:51 +0100 Subject: [PATCH 1297/1405] drm/i915/tv: Fix TV mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 1fd4a5a36f9f ("drm/connector: Rename legacy TV property") failed to update all the users of the struct drm_tv_connector_state mode field, which resulted in a build failure in i915. However, a subsequent commit in the same series reintroduced a mode field in that structure, with a different semantic but the same type, with the assumption that all previous users were updated. Since that didn't happen, the i915 driver now compiles, but mixes accesses to the legacy_mode field and the newer mode field, but with the previous semantics. This obviously doesn't work very well, so we need to update the accesses that weren't in the legacy renaming commit. Fixes: 1fd4a5a36f9f ("drm/connector: Rename legacy TV property") Reported-by: Ville Syrjälä Signed-off-by: Maxime Ripard Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240220131251.453060-1-mripard@kernel.org (cherry picked from commit bf7626f19d6ff14b9722273e23700400cc4d78ba) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_sdvo.c | 10 +++++----- drivers/gpu/drm/i915/display/intel_tv.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index acc6b68041051..2915d7afe5ccc 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -1209,7 +1209,7 @@ static bool intel_sdvo_set_tv_format(struct intel_sdvo *intel_sdvo, struct intel_sdvo_tv_format format; u32 format_map; - format_map = 1 << conn_state->tv.mode; + format_map = 1 << conn_state->tv.legacy_mode; memset(&format, 0, sizeof(format)); memcpy(&format, &format_map, min(sizeof(format), sizeof(format_map))); @@ -2298,7 +2298,7 @@ static int intel_sdvo_get_tv_modes(struct drm_connector *connector) * Read the list of supported input resolutions for the selected TV * format. */ - format_map = 1 << conn_state->tv.mode; + format_map = 1 << conn_state->tv.legacy_mode; memcpy(&tv_res, &format_map, min(sizeof(format_map), sizeof(struct intel_sdvo_sdtv_resolution_request))); @@ -2363,7 +2363,7 @@ intel_sdvo_connector_atomic_get_property(struct drm_connector *connector, int i; for (i = 0; i < intel_sdvo_connector->format_supported_num; i++) - if (state->tv.mode == intel_sdvo_connector->tv_format_supported[i]) { + if (state->tv.legacy_mode == intel_sdvo_connector->tv_format_supported[i]) { *val = i; return 0; @@ -2419,7 +2419,7 @@ intel_sdvo_connector_atomic_set_property(struct drm_connector *connector, struct intel_sdvo_connector_state *sdvo_state = to_intel_sdvo_connector_state(state); if (property == intel_sdvo_connector->tv_format) { - state->tv.mode = intel_sdvo_connector->tv_format_supported[val]; + state->tv.legacy_mode = intel_sdvo_connector->tv_format_supported[val]; if (state->crtc) { struct drm_crtc_state *crtc_state = @@ -3076,7 +3076,7 @@ static bool intel_sdvo_tv_create_property(struct intel_sdvo *intel_sdvo, drm_property_add_enum(intel_sdvo_connector->tv_format, i, tv_format_names[intel_sdvo_connector->tv_format_supported[i]]); - intel_sdvo_connector->base.base.state->tv.mode = intel_sdvo_connector->tv_format_supported[0]; + intel_sdvo_connector->base.base.state->tv.legacy_mode = intel_sdvo_connector->tv_format_supported[0]; drm_object_attach_property(&intel_sdvo_connector->base.base.base, intel_sdvo_connector->tv_format, 0); return true; diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c index d4386cb3569e0..992a725de751a 100644 --- a/drivers/gpu/drm/i915/display/intel_tv.c +++ b/drivers/gpu/drm/i915/display/intel_tv.c @@ -949,7 +949,7 @@ intel_disable_tv(struct intel_atomic_state *state, static const struct tv_mode *intel_tv_mode_find(const struct drm_connector_state *conn_state) { - int format = conn_state->tv.mode; + int format = conn_state->tv.legacy_mode; return &tv_modes[format]; } @@ -1704,7 +1704,7 @@ static void intel_tv_find_better_format(struct drm_connector *connector) break; } - connector->state->tv.mode = i; + connector->state->tv.legacy_mode = i; } static int @@ -1859,7 +1859,7 @@ static int intel_tv_atomic_check(struct drm_connector *connector, old_state = drm_atomic_get_old_connector_state(state, connector); new_crtc_state = drm_atomic_get_new_crtc_state(state, new_state->crtc); - if (old_state->tv.mode != new_state->tv.mode || + if (old_state->tv.legacy_mode != new_state->tv.legacy_mode || old_state->tv.margins.left != new_state->tv.margins.left || old_state->tv.margins.right != new_state->tv.margins.right || old_state->tv.margins.top != new_state->tv.margins.top || @@ -1896,7 +1896,7 @@ static void intel_tv_add_properties(struct drm_connector *connector) conn_state->tv.margins.right = 46; conn_state->tv.margins.bottom = 37; - conn_state->tv.mode = 0; + conn_state->tv.legacy_mode = 0; /* Create TV properties then attach current values */ for (i = 0; i < ARRAY_SIZE(tv_modes); i++) { @@ -1910,7 +1910,7 @@ static void intel_tv_add_properties(struct drm_connector *connector) drm_object_attach_property(&connector->base, i915->drm.mode_config.legacy_tv_mode_property, - conn_state->tv.mode); + conn_state->tv.legacy_mode); drm_object_attach_property(&connector->base, i915->drm.mode_config.tv_left_margin_property, conn_state->tv.margins.left); -- GitLab From 77aebae1ea12de6eae5ce70d05b3d4724eec4023 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 9 Feb 2024 12:34:44 +0100 Subject: [PATCH 1298/1405] drm/xe/uapi: Remove support for persistent exec_queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Persistent exec_queues delays explicit destruction of exec_queues until they are done executing, but destruction on process exit is still immediate. It turns out no UMD is relying on this functionality, so remove it. If there turns out to be a use-case in the future, let's re-add. Persistent exec_queues were never used for LR VMs v2: - Don't add an "UNUSED" define for the missing property (Lucas, Rodrigo) v3: - Remove the remaining struct xe_exec_queue::persistent state (Niranjana, Lucas) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Rodrigo Vivi Cc: Matthew Brost Cc: David Airlie Cc: Daniel Vetter Cc: Lucas De Marchi Cc: Francois Dugast Signed-off-by: Thomas Hellström Reviewed-by: Lucas De Marchi Acked-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240209113444.8396-1-thomas.hellstrom@linux.intel.com (cherry picked from commit f1a9abc0cf311375695bede1590364864c05976d) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_device.c | 39 ------------------------ drivers/gpu/drm/xe/xe_device.h | 4 --- drivers/gpu/drm/xe/xe_device_types.h | 8 ----- drivers/gpu/drm/xe/xe_exec_queue.c | 33 +++----------------- drivers/gpu/drm/xe/xe_exec_queue_types.h | 10 ------ drivers/gpu/drm/xe/xe_execlist.c | 2 -- drivers/gpu/drm/xe/xe_guc_submit.c | 2 -- include/uapi/drm/xe_drm.h | 1 - 8 files changed, 5 insertions(+), 94 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 1f0b4b9ce84f5..5176c27e4b6a4 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -83,9 +83,6 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) return 0; } -static void device_kill_persistent_exec_queues(struct xe_device *xe, - struct xe_file *xef); - static void xe_file_close(struct drm_device *dev, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); @@ -102,8 +99,6 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) mutex_unlock(&xef->exec_queue.lock); xa_destroy(&xef->exec_queue.xa); mutex_destroy(&xef->exec_queue.lock); - device_kill_persistent_exec_queues(xe, xef); - mutex_lock(&xef->vm.lock); xa_for_each(&xef->vm.xa, idx, vm) xe_vm_close_and_put(vm); @@ -255,9 +250,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, xa_erase(&xe->usm.asid_to_vm, asid); } - drmm_mutex_init(&xe->drm, &xe->persistent_engines.lock); - INIT_LIST_HEAD(&xe->persistent_engines.list); - spin_lock_init(&xe->pinned.lock); INIT_LIST_HEAD(&xe->pinned.kernel_bo_present); INIT_LIST_HEAD(&xe->pinned.external_vram); @@ -570,37 +562,6 @@ void xe_device_shutdown(struct xe_device *xe) { } -void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q) -{ - mutex_lock(&xe->persistent_engines.lock); - list_add_tail(&q->persistent.link, &xe->persistent_engines.list); - mutex_unlock(&xe->persistent_engines.lock); -} - -void xe_device_remove_persistent_exec_queues(struct xe_device *xe, - struct xe_exec_queue *q) -{ - mutex_lock(&xe->persistent_engines.lock); - if (!list_empty(&q->persistent.link)) - list_del(&q->persistent.link); - mutex_unlock(&xe->persistent_engines.lock); -} - -static void device_kill_persistent_exec_queues(struct xe_device *xe, - struct xe_file *xef) -{ - struct xe_exec_queue *q, *next; - - mutex_lock(&xe->persistent_engines.lock); - list_for_each_entry_safe(q, next, &xe->persistent_engines.list, - persistent.link) - if (q->persistent.xef == xef) { - xe_exec_queue_kill(q); - list_del_init(&q->persistent.link); - } - mutex_unlock(&xe->persistent_engines.lock); -} - void xe_device_wmb(struct xe_device *xe) { struct xe_gt *gt = xe_root_mmio_gt(xe); diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 3da83b2332063..08d8b72c77319 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -42,10 +42,6 @@ int xe_device_probe(struct xe_device *xe); void xe_device_remove(struct xe_device *xe); void xe_device_shutdown(struct xe_device *xe); -void xe_device_add_persistent_exec_queues(struct xe_device *xe, struct xe_exec_queue *q); -void xe_device_remove_persistent_exec_queues(struct xe_device *xe, - struct xe_exec_queue *q); - void xe_device_wmb(struct xe_device *xe); static inline struct xe_file *to_xe_file(const struct drm_file *file) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 5dc9127a20293..e8491979a6f21 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -341,14 +341,6 @@ struct xe_device { struct mutex lock; } usm; - /** @persistent_engines: engines that are closed but still running */ - struct { - /** @lock: protects persistent engines */ - struct mutex lock; - /** @list: list of persistent engines */ - struct list_head list; - } persistent_engines; - /** @pinned: pinned BO state */ struct { /** @lock: protected pinned BO list state */ diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 254b1d3af4cb5..3acfd4f07666f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -60,7 +60,6 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, q->fence_irq = >->fence_irq[hwe->class]; q->ring_ops = gt->ring_ops[hwe->class]; q->ops = gt->exec_queue_ops; - INIT_LIST_HEAD(&q->persistent.link); INIT_LIST_HEAD(&q->compute.link); INIT_LIST_HEAD(&q->multi_gt_link); @@ -326,23 +325,6 @@ static int exec_queue_set_preemption_timeout(struct xe_device *xe, return q->ops->set_preempt_timeout(q, value); } -static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, xe_vm_in_preempt_fence_mode(q->vm))) - return -EINVAL; - - if (value) - q->flags |= EXEC_QUEUE_FLAG_PERSISTENT; - else - q->flags &= ~EXEC_QUEUE_FLAG_PERSISTENT; - - return 0; -} - static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue *q, u64 value, bool create) { @@ -414,7 +396,6 @@ static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE] = exec_queue_set_persistence, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify, @@ -441,6 +422,9 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); + if (!exec_queue_set_property_funcs[idx]) + return -EINVAL; + return exec_queue_set_property_funcs[idx](xe, q, ext.value, create); } @@ -704,9 +688,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, } q = xe_exec_queue_create(xe, vm, logical_mask, - args->width, hwe, - xe_vm_in_lr_mode(vm) ? 0 : - EXEC_QUEUE_FLAG_PERSISTENT); + args->width, hwe, 0); up_read(&vm->lock); xe_vm_put(vm); if (IS_ERR(q)) @@ -728,8 +710,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, goto kill_exec_queue; } - q->persistent.xef = xef; - mutex_lock(&xef->exec_queue.lock); err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); mutex_unlock(&xef->exec_queue.lock); @@ -872,10 +852,7 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; - if (!(q->flags & EXEC_QUEUE_FLAG_PERSISTENT)) - xe_exec_queue_kill(q); - else - xe_device_add_persistent_exec_queues(xe, q); + xe_exec_queue_kill(q); trace_xe_exec_queue_close(q); xe_exec_queue_put(q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 8d4b7feb8c306..947bbc4b285df 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -105,16 +105,6 @@ struct xe_exec_queue { struct xe_guc_exec_queue *guc; }; - /** - * @persistent: persistent exec queue state - */ - struct { - /** @xef: file which this exec queue belongs to */ - struct xe_file *xef; - /** @link: link in list of persistent exec queues */ - struct list_head link; - } persistent; - union { /** * @parallel: parallel submission state diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 96b5224eb4787..42d01bbbf7d0a 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -378,8 +378,6 @@ static void execlist_exec_queue_fini_async(struct work_struct *w) list_del(&exl->active_link); spin_unlock_irqrestore(&exl->port->lock, flags); - if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT) - xe_device_remove_persistent_exec_queues(xe, q); drm_sched_entity_fini(&exl->entity); drm_sched_fini(&exl->sched); kfree(exl); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 54ffcfcdd41f9..f22ae717b0b2d 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1028,8 +1028,6 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) if (xe_exec_queue_is_lr(q)) cancel_work_sync(&ge->lr_tdr); - if (q->flags & EXEC_QUEUE_FLAG_PERSISTENT) - xe_device_remove_persistent_exec_queues(gt_to_xe(q->gt), q); release_guc_id(guc, q); xe_sched_entity_fini(&ge->entity); xe_sched_fini(&ge->sched); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 9fa3ae324731a..6d11ee9e571ad 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1046,7 +1046,6 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT 2 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE 3 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 4 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 5 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 6 -- GitLab From 85ce8e1d6d73e8d54cb244d10dd4021771231746 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Mon, 19 Feb 2024 19:15:52 +0800 Subject: [PATCH 1299/1405] iommu/vt-d: Track nested domains in parent Today the parent domain (s2_domain) is unaware of which DID's are used by and which devices are attached to nested domains (s1_domain) nested on it. This leads to a problem that some operations (flush iotlb/devtlb and enable dirty tracking) on parent domain only apply to DID's and devices directly tracked in the parent domain hence are incomplete. This tracks the nested domains in list in parent domain. With this, operations on parent domain can loop the nested domains and refer to the devices and iommu_array to ensure the operations on parent domain take effect on all the affected devices and iommus. Signed-off-by: Yi Liu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240208082307.15759-2-yi.l.liu@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 18 ++++++++++++++---- drivers/iommu/intel/iommu.h | 6 ++++++ drivers/iommu/intel/nested.c | 12 +++++++++++- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 6fb5f6fceea11..e393c62776f30 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3883,6 +3883,7 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags, bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; bool nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT; struct intel_iommu *iommu = info->iommu; + struct dmar_domain *dmar_domain; struct iommu_domain *domain; /* Must be NESTING domain */ @@ -3908,11 +3909,16 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags, if (!domain) return ERR_PTR(-ENOMEM); - if (nested_parent) - to_dmar_domain(domain)->nested_parent = true; + dmar_domain = to_dmar_domain(domain); + + if (nested_parent) { + dmar_domain->nested_parent = true; + INIT_LIST_HEAD(&dmar_domain->s1_domains); + spin_lock_init(&dmar_domain->s1_lock); + } if (dirty_tracking) { - if (to_dmar_domain(domain)->use_first_level) { + if (dmar_domain->use_first_level) { iommu_domain_free(domain); return ERR_PTR(-EOPNOTSUPP); } @@ -3924,8 +3930,12 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags, static void intel_iommu_domain_free(struct iommu_domain *domain) { + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + + WARN_ON(dmar_domain->nested_parent && + !list_empty(&dmar_domain->s1_domains)); if (domain != &si_domain->domain) - domain_exit(to_dmar_domain(domain)); + domain_exit(dmar_domain); } int prepare_domain_attach_device(struct iommu_domain *domain, diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index d02f916d8e59a..9b27edb73aa9c 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -627,6 +627,10 @@ struct dmar_domain { int agaw; /* maximum mapped address */ u64 max_addr; + /* Protect the s1_domains list */ + spinlock_t s1_lock; + /* Track s1_domains nested on this domain */ + struct list_head s1_domains; }; /* Nested user domain */ @@ -637,6 +641,8 @@ struct dmar_domain { unsigned long s1_pgtbl; /* page table attributes */ struct iommu_hwpt_vtd_s1 s1_cfg; + /* link to parent domain siblings */ + struct list_head s2_link; }; }; diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index f26c7f1c46cca..6a75f6eb18f1c 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -70,7 +70,13 @@ static int intel_nested_attach_dev(struct iommu_domain *domain, static void intel_nested_domain_free(struct iommu_domain *domain) { - kfree(to_dmar_domain(domain)); + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct dmar_domain *s2_domain = dmar_domain->s2_domain; + + spin_lock(&s2_domain->s1_lock); + list_del(&dmar_domain->s2_link); + spin_unlock(&s2_domain->s1_lock); + kfree(dmar_domain); } static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, @@ -201,5 +207,9 @@ struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent, spin_lock_init(&domain->lock); xa_init(&domain->iommu_array); + spin_lock(&s2_domain->s1_lock); + list_add(&domain->s2_link, &s2_domain->s1_domains); + spin_unlock(&s2_domain->s1_lock); + return &domain->domain; } -- GitLab From 0455d317f533e4427ddaa805563ff48711f05810 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Mon, 19 Feb 2024 19:15:53 +0800 Subject: [PATCH 1300/1405] iommu/vt-d: Add __iommu_flush_iotlb_psi() Add __iommu_flush_iotlb_psi() to do the psi iotlb flush with a DID input rather than calculating it within the helper. This is useful when flushing cache for parent domain which reuses DIDs of its nested domains. Signed-off-by: Yi Liu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240208082307.15759-3-yi.l.liu@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 78 ++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 35 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index e393c62776f30..dbdb8366c42ae 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1368,6 +1368,46 @@ static void domain_flush_pasid_iotlb(struct intel_iommu *iommu, spin_unlock_irqrestore(&domain->lock, flags); } +static void __iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, + unsigned long pfn, unsigned int pages, + int ih) +{ + unsigned int aligned_pages = __roundup_pow_of_two(pages); + unsigned long bitmask = aligned_pages - 1; + unsigned int mask = ilog2(aligned_pages); + u64 addr = (u64)pfn << VTD_PAGE_SHIFT; + + /* + * PSI masks the low order bits of the base address. If the + * address isn't aligned to the mask, then compute a mask value + * needed to ensure the target range is flushed. + */ + if (unlikely(bitmask & pfn)) { + unsigned long end_pfn = pfn + pages - 1, shared_bits; + + /* + * Since end_pfn <= pfn + bitmask, the only way bits + * higher than bitmask can differ in pfn and end_pfn is + * by carrying. This means after masking out bitmask, + * high bits starting with the first set bit in + * shared_bits are all equal in both pfn and end_pfn. + */ + shared_bits = ~(pfn ^ end_pfn) & ~bitmask; + mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG; + } + + /* + * Fallback to domain selective flush if no PSI support or + * the size is too big. + */ + if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap)) + iommu->flush.flush_iotlb(iommu, did, 0, 0, + DMA_TLB_DSI_FLUSH); + else + iommu->flush.flush_iotlb(iommu, did, addr | ih, mask, + DMA_TLB_PSI_FLUSH); +} + static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, struct dmar_domain *domain, unsigned long pfn, unsigned int pages, @@ -1384,42 +1424,10 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, if (ih) ih = 1 << 6; - if (domain->use_first_level) { + if (domain->use_first_level) domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih); - } else { - unsigned long bitmask = aligned_pages - 1; - - /* - * PSI masks the low order bits of the base address. If the - * address isn't aligned to the mask, then compute a mask value - * needed to ensure the target range is flushed. - */ - if (unlikely(bitmask & pfn)) { - unsigned long end_pfn = pfn + pages - 1, shared_bits; - - /* - * Since end_pfn <= pfn + bitmask, the only way bits - * higher than bitmask can differ in pfn and end_pfn is - * by carrying. This means after masking out bitmask, - * high bits starting with the first set bit in - * shared_bits are all equal in both pfn and end_pfn. - */ - shared_bits = ~(pfn ^ end_pfn) & ~bitmask; - mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG; - } - - /* - * Fallback to domain selective flush if no PSI support or - * the size is too big. - */ - if (!cap_pgsel_inv(iommu->cap) || - mask > cap_max_amask_val(iommu->cap)) - iommu->flush.flush_iotlb(iommu, did, 0, 0, - DMA_TLB_DSI_FLUSH); - else - iommu->flush.flush_iotlb(iommu, did, addr | ih, mask, - DMA_TLB_PSI_FLUSH); - } + else + __iommu_flush_iotlb_psi(iommu, did, pfn, pages, ih); /* * In caching mode, changes of pages from non-present to present require -- GitLab From 821985301124c0a5e7ca15fc69a9a531e9442d70 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Mon, 19 Feb 2024 19:15:54 +0800 Subject: [PATCH 1301/1405] iommu/vt-d: Add missing iotlb flush for parent domain If a domain is used as the parent in nested translation its mappings might be cached using DID of the nested domain. But the existing code ignores this fact to only invalidate the iotlb entries tagged by the domain's own DID. Loop the s1_domains list, if any, to invalidate all iotlb entries related to the target s2 address range. According to VT-d spec there is no need for software to explicitly flush the affected s1 cache. It's implicitly done by HW when s2 cache is invalidated. Fixes: b41e38e22539 ("iommu/vt-d: Add nested domain allocation") Signed-off-by: Yi Liu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240208082307.15759-4-yi.l.liu@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index dbdb8366c42ae..e3dbcae959754 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1451,6 +1451,28 @@ static void __mapping_notify_one(struct intel_iommu *iommu, struct dmar_domain * iommu_flush_write_buffer(iommu); } +/* + * Flush the relevant caches in nested translation if the domain + * also serves as a parent + */ +static void parent_domain_flush(struct dmar_domain *domain, + unsigned long pfn, + unsigned long pages, int ih) +{ + struct dmar_domain *s1_domain; + + spin_lock(&domain->s1_lock); + list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { + struct iommu_domain_info *info; + unsigned long i; + + xa_for_each(&s1_domain->iommu_array, i, info) + __iommu_flush_iotlb_psi(info->iommu, info->did, + pfn, pages, ih); + } + spin_unlock(&domain->s1_lock); +} + static void intel_flush_iotlb_all(struct iommu_domain *domain) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); @@ -1470,6 +1492,9 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain) if (!cap_caching_mode(iommu->cap)) iommu_flush_dev_iotlb(dmar_domain, 0, MAX_AGAW_PFN_WIDTH); } + + if (dmar_domain->nested_parent) + parent_domain_flush(dmar_domain, 0, -1, 0); } static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) @@ -1993,6 +2018,9 @@ static void switch_to_super_page(struct dmar_domain *domain, iommu_flush_iotlb_psi(info->iommu, domain, start_pfn, lvl_pages, 0, 0); + if (domain->nested_parent) + parent_domain_flush(domain, start_pfn, + lvl_pages, 0); } pte++; @@ -4125,6 +4153,9 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain, start_pfn, nrpages, list_empty(&gather->freelist), 0); + if (dmar_domain->nested_parent) + parent_domain_flush(dmar_domain, start_pfn, nrpages, + list_empty(&gather->freelist)); put_pages_list(&gather->freelist); } -- GitLab From 29e10487d6df050afeee886b7c1da208f389cb5b Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Mon, 19 Feb 2024 19:15:55 +0800 Subject: [PATCH 1302/1405] iommu/vt-d: Update iotlb in nested domain attach Should call domain_update_iotlb() to update the has_iotlb_device flag of the domain after attaching device to nested domain. Without it, this flag is not set properly and would result in missing device TLB flush. Fixes: 9838f2bb6b6b ("iommu/vt-d: Set the nested domain to a device") Signed-off-by: Yi Liu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240208082307.15759-5-yi.l.liu@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 4 +--- drivers/iommu/intel/iommu.h | 1 + drivers/iommu/intel/nested.c | 2 ++ 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index e3dbcae959754..711c3e3fe0954 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -396,8 +396,6 @@ static int domain_update_device_node(struct dmar_domain *domain) return nid; } -static void domain_update_iotlb(struct dmar_domain *domain); - /* Return the super pagesize bitmap if supported. */ static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain) { @@ -1218,7 +1216,7 @@ domain_lookup_dev_info(struct dmar_domain *domain, return NULL; } -static void domain_update_iotlb(struct dmar_domain *domain) +void domain_update_iotlb(struct dmar_domain *domain) { struct dev_pasid_info *dev_pasid; struct device_domain_info *info; diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 9b27edb73aa9c..4145c04cb1c68 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1066,6 +1066,7 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, */ #define QI_OPT_WAIT_DRAIN BIT(0) +void domain_update_iotlb(struct dmar_domain *domain); int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); void device_block_translation(struct device *dev); diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index 6a75f6eb18f1c..d5af5925a31c6 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -65,6 +65,8 @@ static int intel_nested_attach_dev(struct iommu_domain *domain, list_add(&info->link, &dmar_domain->devices); spin_unlock_irqrestore(&dmar_domain->lock, flags); + domain_update_iotlb(dmar_domain); + return 0; } -- GitLab From 5e54e861f16f37c84ae68d6d4bbd3de54b668317 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Mon, 19 Feb 2024 19:15:56 +0800 Subject: [PATCH 1303/1405] iommu/vt-d: Add missing device iotlb flush for parent domain ATS-capable devices cache the result of nested translation. This result relies on the mappings in s2 domain (a.k.a. parent). When there are modifications in the s2 domain, the related nested translation caches on the device should be flushed. This includes the devices that are attached to the s1 domain. However, the existing code ignores this fact to only loops its own devices. As there is no easy way to identify the exact set of nested translations affected by the change of s2 domain. So, this just flushes the entire device iotlb on the device. As above, driver loops the s2 domain's s1_domains list and loops the devices list of each s1_domain to flush the entire device iotlb on the devices. Fixes: b41e38e22539 ("iommu/vt-d: Add nested domain allocation") Signed-off-by: Yi Liu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240208082307.15759-6-yi.l.liu@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 711c3e3fe0954..2eee83b5441bd 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1461,12 +1461,30 @@ static void parent_domain_flush(struct dmar_domain *domain, spin_lock(&domain->s1_lock); list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { + struct device_domain_info *device_info; struct iommu_domain_info *info; + unsigned long flags; unsigned long i; xa_for_each(&s1_domain->iommu_array, i, info) __iommu_flush_iotlb_psi(info->iommu, info->did, pfn, pages, ih); + + if (!s1_domain->has_iotlb_device) + continue; + + spin_lock_irqsave(&s1_domain->lock, flags); + list_for_each_entry(device_info, &s1_domain->devices, link) + /* + * Address translation cache in device side caches the + * result of nested translation. There is no easy way + * to identify the exact set of nested translations + * affected by a change in S2. So just flush the entire + * device cache. + */ + __iommu_flush_dev_iotlb(device_info, 0, + MAX_AGAW_PFN_WIDTH); + spin_unlock_irqrestore(&s1_domain->lock, flags); } spin_unlock(&domain->s1_lock); } -- GitLab From 56ecaf6c5834ace14941d7f13dceb48bc3327111 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Mon, 19 Feb 2024 19:15:57 +0800 Subject: [PATCH 1304/1405] iommu/vt-d: Remove domain parameter for intel_pasid_setup_dirty_tracking() The only usage of input @domain is to get the domain id (DID) to flush cache after setting dirty tracking. However, DID can be obtained from the pasid entry. So no need to pass in domain. This can make this helper cleaner when adding the missing dirty tracking for the parent domain, which needs to use the DID of nested domain. Signed-off-by: Yi Liu Reviewed-by: Joao Martins Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240208082307.15759-7-yi.l.liu@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 7 +++---- drivers/iommu/intel/pasid.c | 3 +-- drivers/iommu/intel/pasid.h | 1 - 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 2eee83b5441bd..d286b85542271 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4741,8 +4741,7 @@ static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain, goto out_unlock; list_for_each_entry(info, &dmar_domain->devices, link) { - ret = intel_pasid_setup_dirty_tracking(info->iommu, - info->domain, info->dev, + ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev, IOMMU_NO_PASID, enable); if (ret) goto err_unwind; @@ -4756,8 +4755,8 @@ static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain, err_unwind: list_for_each_entry(info, &dmar_domain->devices, link) - intel_pasid_setup_dirty_tracking(info->iommu, dmar_domain, - info->dev, IOMMU_NO_PASID, + intel_pasid_setup_dirty_tracking(info->iommu, info->dev, + IOMMU_NO_PASID, dmar_domain->dirty_tracking); spin_unlock(&dmar_domain->lock); return ret; diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 3239cefa4c337..a32d7e509842d 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -428,7 +428,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, * Set up dirty tracking on a second only or nested translation type. */ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, - struct dmar_domain *domain, struct device *dev, u32 pasid, bool enabled) { @@ -445,7 +444,7 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, return -ENODEV; } - did = domain_id_iommu(domain, iommu); + did = pasid_get_domain_id(pte); pgtt = pasid_pte_get_pgtt(pte); if (pgtt != PASID_ENTRY_PGTT_SL_ONLY && pgtt != PASID_ENTRY_PGTT_NESTED) { diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 8d40d4c66e319..487ede039bdde 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -307,7 +307,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, u32 pasid); int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu, - struct dmar_domain *domain, struct device *dev, u32 pasid, bool enabled); int intel_pasid_setup_pass_through(struct intel_iommu *iommu, -- GitLab From 0c7f2497b39da44253d7bcf2b41f52b0048859ad Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Mon, 19 Feb 2024 19:15:58 +0800 Subject: [PATCH 1305/1405] iommu/vt-d: Wrap the dirty tracking loop to be a helper Add device_set_dirty_tracking() to loop all the devices and set the dirty tracking per the @enable parameter. Signed-off-by: Yi Liu Reviewed-by: Kevin Tian Reviewed-by: Joao Martins Link: https://lore.kernel.org/r/20240208082307.15759-8-yi.l.liu@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d286b85542271..2ad8fbe6dc210 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4729,23 +4729,38 @@ static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type) return vtd; } +/* + * Set dirty tracking for the device list of a domain. The caller must + * hold the domain->lock when calling it. + */ +static int device_set_dirty_tracking(struct list_head *devices, bool enable) +{ + struct device_domain_info *info; + int ret = 0; + + list_for_each_entry(info, devices, link) { + ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev, + IOMMU_NO_PASID, enable); + if (ret) + break; + } + + return ret; +} + static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain, bool enable) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); - struct device_domain_info *info; int ret; spin_lock(&dmar_domain->lock); if (dmar_domain->dirty_tracking == enable) goto out_unlock; - list_for_each_entry(info, &dmar_domain->devices, link) { - ret = intel_pasid_setup_dirty_tracking(info->iommu, info->dev, - IOMMU_NO_PASID, enable); - if (ret) - goto err_unwind; - } + ret = device_set_dirty_tracking(&dmar_domain->devices, enable); + if (ret) + goto err_unwind; dmar_domain->dirty_tracking = enable; out_unlock: @@ -4754,10 +4769,8 @@ static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain, return 0; err_unwind: - list_for_each_entry(info, &dmar_domain->devices, link) - intel_pasid_setup_dirty_tracking(info->iommu, info->dev, - IOMMU_NO_PASID, - dmar_domain->dirty_tracking); + device_set_dirty_tracking(&dmar_domain->devices, + dmar_domain->dirty_tracking); spin_unlock(&dmar_domain->lock); return ret; } -- GitLab From f1e1610950eac0af5e40f6ee02315952f78192f7 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Mon, 19 Feb 2024 19:15:59 +0800 Subject: [PATCH 1306/1405] iommu/vt-d: Add missing dirty tracking set for parent domain Setting dirty tracking for a s2 domain requires to loop all the related devices and set the dirty tracking enable bit in the PASID table entry. This includes the devices that are attached to the nested domains of a s2 domain if this s2 domain is used as parent. However, the existing dirty tracking set only loops s2 domain's own devices. It will miss dirty page logs in the parent domain. Now, the parent domain tracks the nested domains, so it can loop the nested domains and the devices attached to the nested domains to ensure dirty tracking on the parent is set completely. Fixes: b41e38e22539 ("iommu/vt-d: Add nested domain allocation") Signed-off-by: Yi Sun Signed-off-by: Yi Liu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240208082307.15759-9-yi.l.liu@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 2ad8fbe6dc210..11652e0bcab3a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4748,6 +4748,35 @@ static int device_set_dirty_tracking(struct list_head *devices, bool enable) return ret; } +static int parent_domain_set_dirty_tracking(struct dmar_domain *domain, + bool enable) +{ + struct dmar_domain *s1_domain; + unsigned long flags; + int ret; + + spin_lock(&domain->s1_lock); + list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { + spin_lock_irqsave(&s1_domain->lock, flags); + ret = device_set_dirty_tracking(&s1_domain->devices, enable); + spin_unlock_irqrestore(&s1_domain->lock, flags); + if (ret) + goto err_unwind; + } + spin_unlock(&domain->s1_lock); + return 0; + +err_unwind: + list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) { + spin_lock_irqsave(&s1_domain->lock, flags); + device_set_dirty_tracking(&s1_domain->devices, + domain->dirty_tracking); + spin_unlock_irqrestore(&s1_domain->lock, flags); + } + spin_unlock(&domain->s1_lock); + return ret; +} + static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain, bool enable) { @@ -4762,6 +4791,12 @@ static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain, if (ret) goto err_unwind; + if (dmar_domain->nested_parent) { + ret = parent_domain_set_dirty_tracking(dmar_domain, enable); + if (ret) + goto err_unwind; + } + dmar_domain->dirty_tracking = enable; out_unlock: spin_unlock(&dmar_domain->lock); -- GitLab From 1f0198fce68340e0da2d438f4ea9fc20d2c958da Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Mon, 19 Feb 2024 19:16:00 +0800 Subject: [PATCH 1307/1405] iommu/vt-d: Set SSADE when attaching to a parent with dirty tracking Should set the SSADE (Second Stage Access/Dirty bit Enable) bit of the pasid entry when attaching a device to a nested domain if its parent has already enabled dirty tracking. Fixes: 111bf85c68f6 ("iommu/vt-d: Add helper to setup pasid nested translation") Signed-off-by: Yi Liu Reviewed-by: Joao Martins Link: https://lore.kernel.org/r/20240208091414.28133-1-yi.l.liu@intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/pasid.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index a32d7e509842d..108158e2b907d 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -657,6 +657,8 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, pasid_set_domain_id(pte, did); pasid_set_address_width(pte, s2_domain->agaw); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + if (s2_domain->dirty_tracking) + pasid_set_ssade(pte); pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED); pasid_set_present(pte); spin_unlock(&iommu->lock); -- GitLab From 4578f989ed6b77c14af86bb882cc5ff0a46a69eb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 19 Feb 2024 19:16:01 +0800 Subject: [PATCH 1308/1405] iommu/vt-d: Fix constant-out-of-range warning On 32-bit builds, the vt-d driver causes a warning with clang: drivers/iommu/intel/nested.c:112:13: error: result of comparison of constant 18446744073709551615 with expression of type 'unsigned long' is always false [-Werror,-Wtautological-constant-out-of-range-compare] 112 | if (npages == U64_MAX) | ~~~~~~ ^ ~~~~~~~ Make the variable a 64-bit type, which matches both the caller and the use anyway. Fixes: f6f3721244a8 ("iommu/vt-d: Add iotlb flush for nested domain") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240213095832.455245-1-arnd@kernel.org Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index d5af5925a31c6..a7d68f3d518ac 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -103,7 +103,7 @@ static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, } static void intel_nested_flush_cache(struct dmar_domain *domain, u64 addr, - unsigned long npages, bool ih) + u64 npages, bool ih) { struct iommu_domain_info *info; unsigned int mask; -- GitLab From ecfac05f962f3aa567ae1796b2586a64fb97fe24 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 19 Feb 2024 13:19:40 -0800 Subject: [PATCH 1309/1405] drm/xe: Fix xe_vma_set_pte_size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xe_vma_set_pte_size had a return value and did not set the 4k VMA flag. Both of these were incorrect. Fix these. Fixes: c47794bdd63d ("drm/xe: Set max pte size when skipping rebinds") Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240219211942.3633795-2-matthew.brost@intel.com (cherry picked from commit 19adaccef8b246182dc89a7470aa7758245efd5d) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 7b00faa672879..0007448aa1898 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2198,7 +2198,7 @@ static u64 xe_vma_max_pte_size(struct xe_vma *vma) return SZ_1G; /* Uninitialized, used max size */ } -static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size) +static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) { switch (size) { case SZ_1G: @@ -2207,9 +2207,10 @@ static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size) case SZ_2M: vma->gpuva.flags |= XE_VMA_PTE_2M; break; + case SZ_4K: + vma->gpuva.flags |= XE_VMA_PTE_4K; + break; } - - return SZ_4K; } static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) -- GitLab From 4cf8ffeb6625b7afd97b8d6698f1887071335c32 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 19 Feb 2024 13:19:41 -0800 Subject: [PATCH 1310/1405] drm/xe: Add XE_VMA_PTE_64K VMA flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add XE_VMA_PTE_64K VMA flag to ensure skipping rebinds does not cross 64k page boundaries. Fixes: 8f33b4f054fc ("drm/xe: Avoid doing rebinds") Fixes: c47794bdd63d ("drm/xe: Set max pte size when skipping rebinds") Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240219211942.3633795-3-matthew.brost@intel.com (cherry picked from commit 15f0e0c2c46dddd8ee56d9b3db679fd302cc4b91) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_pt.c | 6 ++++-- drivers/gpu/drm/xe/xe_vm.c | 5 +++++ drivers/gpu/drm/xe/xe_vm_types.h | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index ac19bfa3f798c..d237eeb3513d4 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -499,10 +499,12 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, * this device *requires* 64K PTE size for VRAM, fail. */ if (level == 0 && !xe_parent->is_compact) { - if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) + if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { + xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K; pte |= XE_PTE_PS64; - else if (XE_WARN_ON(xe_walk->needs_64K)) + } else if (XE_WARN_ON(xe_walk->needs_64K)) { return -EINVAL; + } } ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 0007448aa1898..a0fc3640d3fa1 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2192,6 +2192,8 @@ static u64 xe_vma_max_pte_size(struct xe_vma *vma) return SZ_1G; else if (vma->gpuva.flags & XE_VMA_PTE_2M) return SZ_2M; + else if (vma->gpuva.flags & XE_VMA_PTE_64K) + return SZ_64K; else if (vma->gpuva.flags & XE_VMA_PTE_4K) return SZ_4K; @@ -2207,6 +2209,9 @@ static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) case SZ_2M: vma->gpuva.flags |= XE_VMA_PTE_2M; break; + case SZ_64K: + vma->gpuva.flags |= XE_VMA_PTE_64K; + break; case SZ_4K: vma->gpuva.flags |= XE_VMA_PTE_4K; break; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 5ac9c5bebabc3..91800ce708450 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -29,6 +29,7 @@ struct xe_vm; #define XE_VMA_PTE_4K (DRM_GPUVA_USERBITS << 5) #define XE_VMA_PTE_2M (DRM_GPUVA_USERBITS << 6) #define XE_VMA_PTE_1G (DRM_GPUVA_USERBITS << 7) +#define XE_VMA_PTE_64K (DRM_GPUVA_USERBITS << 8) /** struct xe_userptr - User pointer */ struct xe_userptr { -- GitLab From 5b672ec3f5e15062b76d280f8a4df15e763f6abe Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 19 Feb 2024 13:19:42 -0800 Subject: [PATCH 1311/1405] drm/xe: Return 2MB page size for compact 64k PTEs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compact 64k PTEs are only intended to be used within a single VMA which covers the entire 2MB range of the compact 64k PTEs. Add XE_VMA_PTE_COMPACT VMA flag to indicate compact 64k PTEs are used and update xe_vma_max_pte_size to return at least 2MB if set. v2: Include missing changes Fixes: 8f33b4f054fc ("drm/xe: Avoid doing rebinds") Fixes: c47794bdd63d ("drm/xe: Set max pte size when skipping rebinds") Reported-by: Paulo Zanoni Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/758 Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240219211942.3633795-4-matthew.brost@intel.com (cherry picked from commit 0f688c0eb63a643ef0568b29b12cefbb23181e1a) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_pt.c | 5 ++++- drivers/gpu/drm/xe/xe_vm.c | 2 +- drivers/gpu/drm/xe/xe_vm_types.h | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index d237eeb3513d4..6653c045f3c92 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -547,13 +547,16 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, *child = &xe_child->base; /* - * Prefer the compact pagetable layout for L0 if possible. + * Prefer the compact pagetable layout for L0 if possible. Only + * possible if VMA covers entire 2MB region as compact 64k and + * 4k pages cannot be mixed within a 2MB region. * TODO: Suballocate the pt bo to avoid wasting a lot of * memory. */ if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && covers && xe_pt_scan_64K(addr, next, xe_walk)) { walk->shifts = xe_compact_pt_shifts; + xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT; flags |= XE_PDE_64K; xe_child->is_compact = true; } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index a0fc3640d3fa1..921ca28d49dd9 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2190,7 +2190,7 @@ static u64 xe_vma_max_pte_size(struct xe_vma *vma) { if (vma->gpuva.flags & XE_VMA_PTE_1G) return SZ_1G; - else if (vma->gpuva.flags & XE_VMA_PTE_2M) + else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) return SZ_2M; else if (vma->gpuva.flags & XE_VMA_PTE_64K) return SZ_64K; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 91800ce708450..a603cc2eb56b3 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -30,6 +30,7 @@ struct xe_vm; #define XE_VMA_PTE_2M (DRM_GPUVA_USERBITS << 6) #define XE_VMA_PTE_1G (DRM_GPUVA_USERBITS << 7) #define XE_VMA_PTE_64K (DRM_GPUVA_USERBITS << 8) +#define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 9) /** struct xe_userptr - User pointer */ struct xe_userptr { -- GitLab From e2941a482a5de088b6dd75a985a76ff486383b7e Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Tue, 6 Feb 2024 11:27:31 -0800 Subject: [PATCH 1312/1405] drm/xe/xe_gt_idle: Drop redundant newline in name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newline in name is redunant and produces an unnecessary empty line during 'cat name'. Newline is added during sysfs_emit. See '27a1a1e2e47d ("drm/xe: stringify the argument to avoid potential vulnerability")'. v2: Add Fixes tag (Riana) Fixes: 7b076d14f21a ("drm/xe/mtl: Add support to get C6 residency/status of MTL") Reviewed-by: Riana Tauro Signed-off-by: Ashutosh Dixit (cherry picked from commit e5626eb80026c4b63f8682cdeca1456303c65791) Link: https://patchwork.freedesktop.org/patch/msgid/20240206192731.3533608-1-ashutosh.dixit@intel.com Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_gt_idle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 9358f73368896..9fcae65b64699 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -145,10 +145,10 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle) } if (xe_gt_is_media_type(gt)) { - sprintf(gtidle->name, "gt%d-mc\n", gt->info.id); + sprintf(gtidle->name, "gt%d-mc", gt->info.id); gtidle->idle_residency = xe_guc_pc_mc6_residency; } else { - sprintf(gtidle->name, "gt%d-rc\n", gt->info.id); + sprintf(gtidle->name, "gt%d-rc", gt->info.id); gtidle->idle_residency = xe_guc_pc_rc6_residency; } -- GitLab From 8d3a7dfb801d157ac423261d7cd62c33e95375f8 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 21 Feb 2024 09:27:31 +0000 Subject: [PATCH 1313/1405] KVM: arm64: vgic-its: Test for valid IRQ in its_sync_lpi_pending_table() vgic_get_irq() may not return a valid descriptor if there is no ITS that holds a valid translation for the specified INTID. If that is the case, it is safe to silently ignore it and continue processing the LPI pending table. Cc: stable@vger.kernel.org Fixes: 33d3bc9556a7 ("KVM: arm64: vgic-its: Read initial LPI pending table") Signed-off-by: Oliver Upton Link: https://lore.kernel.org/r/20240221092732.4126848-2-oliver.upton@linux.dev Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-its.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index e2764d0ffa9f3..082448de27ed9 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -468,6 +468,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) } irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); + if (!irq) + continue; + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = pendmask & (1U << bit_nr); vgic_queue_irq_unlock(vcpu->kvm, irq, flags); -- GitLab From 85a71ee9a0700f6c18862ef3b0011ed9dad99aca Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 21 Feb 2024 09:27:32 +0000 Subject: [PATCH 1314/1405] KVM: arm64: vgic-its: Test for valid IRQ in MOVALL handler It is possible that an LPI mapped in a different ITS gets unmapped while handling the MOVALL command. If that is the case, there is no state that can be migrated to the destination. Silently ignore it and continue migrating other LPIs. Cc: stable@vger.kernel.org Fixes: ff9c114394aa ("KVM: arm/arm64: GICv4: Handle MOVALL applied to a vPE") Signed-off-by: Oliver Upton Link: https://lore.kernel.org/r/20240221092732.4126848-3-oliver.upton@linux.dev Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-its.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 082448de27ed9..28a93074eca17 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -1435,6 +1435,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, for (i = 0; i < irq_count; i++) { irq = vgic_get_irq(kvm, NULL, intids[i]); + if (!irq) + continue; update_affinity(irq, vcpu2); -- GitLab From 6650d23f3e20ca00482a71a4ef900f0ea776fb15 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 12 Feb 2024 19:35:48 -0800 Subject: [PATCH 1315/1405] drm/xe: Fix modpost warning on xe_mocs kunit module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit $ make W=1 -j100 M=drivers/gpu/drm/xe MODPOST drivers/gpu/drm/xe/Module.symvers WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/gpu/drm/xe/tests/xe_mocs_test.o Fix is identical to '1d425066f15f ("drm/xe: Fix modpost warning on kunit modules")'. Fixes: a6a4ea6d7d37 ("drm/xe: Add mocs kunit") Signed-off-by: Ashutosh Dixit Reviewed-by: Rodrigo Vivi (cherry picked from commit bb619d71224ea85ec94e0a83b2bb82ebe7df2a41) Link: https://patchwork.freedesktop.org/patch/msgid/20240213033548.76219-1-ashutosh.dixit@intel.com Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/tests/xe_mocs_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c index ef56bd517b28c..421b819fd4ba9 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs_test.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.c @@ -21,4 +21,5 @@ kunit_test_suite(xe_mocs_test_suite); MODULE_AUTHOR("Intel Corporation"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("xe_mocs kunit test"); MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); -- GitLab From 3b1ae9b71c2a97f848b00fb085a2bd29bddbe8d9 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 19 Feb 2024 18:25:14 +0530 Subject: [PATCH 1316/1405] octeontx2-af: Consider the action set by PF AF reserves MCAM entries for each PF, VF present in the system and populates the entry with DMAC and action with default RSS so that basic packet I/O works. Since PF/VF is not aware of the RSS action installed by AF, AF only fixup the actions of the rules installed by PF/VF with corresponding default RSS action. This worked well for rules installed by PF/VF for features like RX VLAN offload and DMAC filters but rules involving action like drop/forward to queue are also getting modified by AF. Hence fix it by setting the default RSS action only if requested by PF/VF. Fixes: 967db3529eca ("octeontx2-af: add support for multicast/promisc packet replication feature") Signed-off-by: Subbaraya Sundeep Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index e5d6156655ba4..516adb50f9f6b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -415,6 +415,10 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam, return; } + /* AF modifies given action iff PF/VF has requested for it */ + if ((entry->action & 0xFULL) != NIX_RX_ACTION_DEFAULT) + return; + /* copy VF default entry action to the VF mcam entry */ rx_action = npc_get_default_entry_action(rvu, mcam, blkaddr, target_func); -- GitLab From 56667da7399eb19af857e30f41bea89aa6fa812c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Feb 2024 14:12:20 +0000 Subject: [PATCH 1317/1405] net: implement lockless setsockopt(SO_PEEK_OFF) syzbot reported a lockdep violation [1] involving af_unix support of SO_PEEK_OFF. Since SO_PEEK_OFF is inherently not thread safe (it uses a per-socket sk_peek_off field), there is really no point to enforce a pointless thread safety in the kernel. After this patch : - setsockopt(SO_PEEK_OFF) no longer acquires the socket lock. - skb_consume_udp() no longer has to acquire the socket lock. - af_unix no longer needs a special version of sk_set_peek_off(), because it does not lock u->iolock anymore. As a followup, we could replace prot->set_peek_off to be a boolean and avoid an indirect call, since we always use sk_set_peek_off(). [1] WARNING: possible circular locking dependency detected 6.8.0-rc4-syzkaller-00267-g0f1dd5e91e2b #0 Not tainted syz-executor.2/30025 is trying to acquire lock: ffff8880765e7d80 (&u->iolock){+.+.}-{3:3}, at: unix_set_peek_off+0x26/0xa0 net/unix/af_unix.c:789 but task is already holding lock: ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1691 [inline] ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sockopt_lock_sock net/core/sock.c:1060 [inline] ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sk_setsockopt+0xe52/0x3360 net/core/sock.c:1193 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (sk_lock-AF_UNIX){+.+.}-{0:0}: lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 lock_sock_nested+0x48/0x100 net/core/sock.c:3524 lock_sock include/net/sock.h:1691 [inline] __unix_dgram_recvmsg+0x1275/0x12c0 net/unix/af_unix.c:2415 sock_recvmsg_nosec+0x18e/0x1d0 net/socket.c:1046 ____sys_recvmsg+0x3c0/0x470 net/socket.c:2801 ___sys_recvmsg net/socket.c:2845 [inline] do_recvmmsg+0x474/0xae0 net/socket.c:2939 __sys_recvmmsg net/socket.c:3018 [inline] __do_sys_recvmmsg net/socket.c:3041 [inline] __se_sys_recvmmsg net/socket.c:3034 [inline] __x64_sys_recvmmsg+0x199/0x250 net/socket.c:3034 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 -> #0 (&u->iolock){+.+.}-{3:3}: check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18ca/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 __mutex_lock_common kernel/locking/mutex.c:608 [inline] __mutex_lock+0x136/0xd70 kernel/locking/mutex.c:752 unix_set_peek_off+0x26/0xa0 net/unix/af_unix.c:789 sk_setsockopt+0x207e/0x3360 do_sock_setsockopt+0x2fb/0x720 net/socket.c:2307 __sys_setsockopt+0x1ad/0x250 net/socket.c:2334 __do_sys_setsockopt net/socket.c:2343 [inline] __se_sys_setsockopt net/socket.c:2340 [inline] __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(sk_lock-AF_UNIX); lock(&u->iolock); lock(sk_lock-AF_UNIX); lock(&u->iolock); *** DEADLOCK *** 1 lock held by syz-executor.2/30025: #0: ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1691 [inline] #0: ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sockopt_lock_sock net/core/sock.c:1060 [inline] #0: ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sk_setsockopt+0xe52/0x3360 net/core/sock.c:1193 stack backtrace: CPU: 0 PID: 30025 Comm: syz-executor.2 Not tainted 6.8.0-rc4-syzkaller-00267-g0f1dd5e91e2b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e7/0x2e0 lib/dump_stack.c:106 check_noncircular+0x36a/0x4a0 kernel/locking/lockdep.c:2187 check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18ca/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 __mutex_lock_common kernel/locking/mutex.c:608 [inline] __mutex_lock+0x136/0xd70 kernel/locking/mutex.c:752 unix_set_peek_off+0x26/0xa0 net/unix/af_unix.c:789 sk_setsockopt+0x207e/0x3360 do_sock_setsockopt+0x2fb/0x720 net/socket.c:2307 __sys_setsockopt+0x1ad/0x250 net/socket.c:2334 __do_sys_setsockopt net/socket.c:2343 [inline] __se_sys_setsockopt net/socket.c:2340 [inline] __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f78a1c7dda9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f78a0fde0c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 00007f78a1dac050 RCX: 00007f78a1c7dda9 RDX: 000000000000002a RSI: 0000000000000001 RDI: 0000000000000006 RBP: 00007f78a1cca47a R08: 0000000000000004 R09: 0000000000000000 R10: 0000000020000180 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000006e R14: 00007f78a1dac050 R15: 00007ffe5cd81ae8 Fixes: 859051dd165e ("bpf: Implement cgroup sockaddr hooks for unix sockets") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Cc: Daan De Meyer Cc: Kuniyuki Iwashima Cc: Martin KaFai Lau Cc: David Ahern Reviewed-by: Willem de Bruijn Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/core/sock.c | 23 +++++++++++------------ net/ipv4/udp.c | 7 +------ net/unix/af_unix.c | 19 +++---------------- 3 files changed, 15 insertions(+), 34 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 0a7f46c37f0cf..5e78798456fd8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1188,6 +1188,17 @@ int sk_setsockopt(struct sock *sk, int level, int optname, */ WRITE_ONCE(sk->sk_txrehash, (u8)val); return 0; + case SO_PEEK_OFF: + { + int (*set_peek_off)(struct sock *sk, int val); + + set_peek_off = READ_ONCE(sock->ops)->set_peek_off; + if (set_peek_off) + ret = set_peek_off(sk, val); + else + ret = -EOPNOTSUPP; + return ret; + } } sockopt_lock_sock(sk); @@ -1430,18 +1441,6 @@ int sk_setsockopt(struct sock *sk, int level, int optname, sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); break; - case SO_PEEK_OFF: - { - int (*set_peek_off)(struct sock *sk, int val); - - set_peek_off = READ_ONCE(sock->ops)->set_peek_off; - if (set_peek_off) - ret = set_peek_off(sk, val); - else - ret = -EOPNOTSUPP; - break; - } - case SO_NOFCS: sock_valbool_flag(sk, SOCK_NOFCS, valbool); break; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f631b0a21af4c..e474b201900f9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1589,12 +1589,7 @@ int udp_init_sock(struct sock *sk) void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) { - if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) { - bool slow = lock_sock_fast(sk); - - sk_peek_offset_bwd(sk, len); - unlock_sock_fast(sk, slow); - } + sk_peek_offset_bwd(sk, len); if (!skb_unref(skb)) return; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 30b178ebba60a..0748e7ea5210e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -782,19 +782,6 @@ static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, int); -static int unix_set_peek_off(struct sock *sk, int val) -{ - struct unix_sock *u = unix_sk(sk); - - if (mutex_lock_interruptible(&u->iolock)) - return -EINTR; - - WRITE_ONCE(sk->sk_peek_off, val); - mutex_unlock(&u->iolock); - - return 0; -} - #ifdef CONFIG_PROC_FS static int unix_count_nr_fds(struct sock *sk) { @@ -862,7 +849,7 @@ static const struct proto_ops unix_stream_ops = { .read_skb = unix_stream_read_skb, .mmap = sock_no_mmap, .splice_read = unix_stream_splice_read, - .set_peek_off = unix_set_peek_off, + .set_peek_off = sk_set_peek_off, .show_fdinfo = unix_show_fdinfo, }; @@ -886,7 +873,7 @@ static const struct proto_ops unix_dgram_ops = { .read_skb = unix_read_skb, .recvmsg = unix_dgram_recvmsg, .mmap = sock_no_mmap, - .set_peek_off = unix_set_peek_off, + .set_peek_off = sk_set_peek_off, .show_fdinfo = unix_show_fdinfo, }; @@ -909,7 +896,7 @@ static const struct proto_ops unix_seqpacket_ops = { .sendmsg = unix_seqpacket_sendmsg, .recvmsg = unix_seqpacket_recvmsg, .mmap = sock_no_mmap, - .set_peek_off = unix_set_peek_off, + .set_peek_off = sk_set_peek_off, .show_fdinfo = unix_show_fdinfo, }; -- GitLab From d80f8e96d47d7374794a30fbed69be43f3388afc Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 19 Feb 2024 08:40:15 -0600 Subject: [PATCH 1318/1405] net: ipa: don't overrun IPA suspend interrupt registers In newer hardware, IPA supports more than 32 endpoints. Some registers--such as IPA interrupt registers--represent endpoints as bits in a 4-byte register, and such registers are repeated as needed to represent endpoints beyond the first 32. In ipa_interrupt_suspend_clear_all(), we clear all pending IPA suspend interrupts by reading all status register(s) and writing corresponding registers to clear interrupt conditions. Unfortunately the number of registers to read/write is calculated incorrectly, and as a result we access *many* more registers than intended. This bug occurs only when the IPA hardware signals a SUSPEND interrupt, which happens when a packet is received for an endpoint (or its underlying GSI channel) that is suspended. This situation is difficult to reproduce, but possible. Fix this by correctly computing the number of interrupt registers to read and write. This is the only place in the code where registers that map endpoints or channels this way perform this calculation. Fixes: f298ba785e2d ("net: ipa: add a parameter to suspend registers") Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa_interrupt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c index 4bc05948f772d..a78c692f2d3c5 100644 --- a/drivers/net/ipa/ipa_interrupt.c +++ b/drivers/net/ipa/ipa_interrupt.c @@ -212,7 +212,7 @@ void ipa_interrupt_suspend_clear_all(struct ipa_interrupt *interrupt) u32 unit_count; u32 unit; - unit_count = roundup(ipa->endpoint_count, 32); + unit_count = DIV_ROUND_UP(ipa->endpoint_count, 32); for (unit = 0; unit < unit_count; unit++) { const struct reg *reg; u32 val; -- GitLab From aa82ac51d63328714645c827775d64dbfd9941f3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 19 Feb 2024 09:46:57 -0800 Subject: [PATCH 1319/1405] af_unix: Drop oob_skb ref before purging queue in GC. syzbot reported another task hung in __unix_gc(). [0] The current while loop assumes that all of the left candidates have oob_skb and calling kfree_skb(oob_skb) releases the remaining candidates. However, I missed a case that oob_skb has self-referencing fd and another fd and the latter sk is placed before the former in the candidate list. Then, the while loop never proceeds, resulting the task hung. __unix_gc() has the same loop just before purging the collected skb, so we can call kfree_skb(oob_skb) there and let __skb_queue_purge() release all inflight sockets. [0]: Sending NMI from CPU 0 to CPUs 1: NMI backtrace for cpu 1 CPU: 1 PID: 2784 Comm: kworker/u4:8 Not tainted 6.8.0-rc4-syzkaller-01028-g71b605d32017 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Workqueue: events_unbound __unix_gc RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x70 kernel/kcov.c:200 Code: 89 fb e8 23 00 00 00 48 8b 3d 84 f5 1a 0c 48 89 de 5b e9 43 26 57 00 0f 1f 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1e fa 48 8b 04 24 65 48 8b 0d 90 52 70 7e 65 8b 15 91 52 70 RSP: 0018:ffffc9000a17fa78 EFLAGS: 00000287 RAX: ffffffff8a0a6108 RBX: ffff88802b6c2640 RCX: ffff88802c0b3b80 RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000 RBP: ffffc9000a17fbf0 R08: ffffffff89383f1d R09: 1ffff1100ee5ff84 R10: dffffc0000000000 R11: ffffed100ee5ff85 R12: 1ffff110056d84ee R13: ffffc9000a17fae0 R14: 0000000000000000 R15: ffffffff8f47b840 FS: 0000000000000000(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffef5687ff8 CR3: 0000000029b34000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __unix_gc+0xe69/0xf40 net/unix/garbage.c:343 process_one_work kernel/workqueue.c:2633 [inline] process_scheduled_works+0x913/0x1420 kernel/workqueue.c:2706 worker_thread+0xa5f/0x1000 kernel/workqueue.c:2787 kthread+0x2ef/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:242 Reported-and-tested-by: syzbot+ecab4d36f920c3574bf9@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=ecab4d36f920c3574bf9 Fixes: 25236c91b5ab ("af_unix: Fix task hung while purging oob_skb in GC.") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/unix/garbage.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 2ff7ddbaa782e..2a81880dac7b7 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -284,9 +284,17 @@ void unix_gc(void) * which are creating the cycle(s). */ skb_queue_head_init(&hitlist); - list_for_each_entry(u, &gc_candidates, link) + list_for_each_entry(u, &gc_candidates, link) { scan_children(&u->sk, inc_inflight, &hitlist); +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (u->oob_skb) { + kfree_skb(u->oob_skb); + u->oob_skb = NULL; + } +#endif + } + /* not_cycle_list contains those sockets which do not make up a * cycle. Restore these to the inflight list. */ @@ -314,18 +322,6 @@ void unix_gc(void) /* Here we are. Hitlist is filled. Die. */ __skb_queue_purge(&hitlist); -#if IS_ENABLED(CONFIG_AF_UNIX_OOB) - while (!list_empty(&gc_candidates)) { - u = list_entry(gc_candidates.next, struct unix_sock, link); - if (u->oob_skb) { - struct sk_buff *skb = u->oob_skb; - - u->oob_skb = NULL; - kfree_skb(skb); - } - } -#endif - spin_lock(&unix_gc_lock); /* There could be io_uring registered files, just push them back to -- GitLab From 14dec56fdd4c70a0ebe40077368e367421ea6fef Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 19 Feb 2024 17:55:31 +0000 Subject: [PATCH 1320/1405] MAINTAINERS: Add framer headers to NETWORKING [GENERAL] The cited commit [1] added framer support under drivers/net/wan, which is covered by NETWORKING [GENERAL]. And it is implied that framer-provider.h and framer.h, which were also added buy the same patch, are also maintained as part of NETWORKING [GENERAL]. Make this explicit by adding these files to the corresponding section in MAINTAINERS. [1] 82c944d05b1a ("net: wan: Add framer framework support") Signed-off-by: Simon Horman Reviewed-by: Herve Codina Signed-off-by: David S. Miller --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a0697e2fb8e8b..466a0fc46f76e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15237,6 +15237,8 @@ F: Documentation/networking/ F: Documentation/networking/net_cachelines/ F: Documentation/process/maintainer-netdev.rst F: Documentation/userspace-api/netlink/ +F: include/linux/framer/framer-provider.h +F: include/linux/framer/framer.h F: include/linux/in.h F: include/linux/indirect_call_wrapper.h F: include/linux/net.h -- GitLab From ed683b9bb91fc274383e222ba5873a9ee9033462 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 20 Feb 2024 10:54:12 +0100 Subject: [PATCH 1321/1405] sparc: Fix undefined reference to fb_is_primary_device Commit 55bffc8170bb ("fbdev: Split frame buffer support in FB and FB_CORE symbols") added a new FB_CORE Kconfig symbol, that can be enabled to only have fbcon/VT and DRM fbdev emulation, but without support for any legacy fbdev driver. Unfortunately, it missed to change the CONFIG_FB in arch/sparc makefiles, which leads to the following linking error in some sparc64 configurations: sparc64-linux-ld: drivers/video/fbdev/core/fbcon.o: in function `fbcon_fb_registered': >> fbcon.c:(.text+0x4f60): undefined reference to `fb_is_primary_device' Fixes: 55bffc8170bb ("fbdev: Split frame buffer support in FB and FB_CORE symbols") Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202401290306.IV8rhJ02-lkp@intel.com/ Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Acked-by: Arnd Bergmann Cc: # v6.6+ Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240220095428.3341195-1-javierm@redhat.com --- arch/sparc/Makefile | 2 +- arch/sparc/video/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index 5f60359361312..2a03daa68f285 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -60,7 +60,7 @@ libs-y += arch/sparc/prom/ libs-y += arch/sparc/lib/ drivers-$(CONFIG_PM) += arch/sparc/power/ -drivers-$(CONFIG_FB) += arch/sparc/video/ +drivers-$(CONFIG_FB_CORE) += arch/sparc/video/ boot := arch/sparc/boot diff --git a/arch/sparc/video/Makefile b/arch/sparc/video/Makefile index 6baddbd58e4db..d4d83f1702c61 100644 --- a/arch/sparc/video/Makefile +++ b/arch/sparc/video/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_FB) += fbdev.o +obj-$(CONFIG_FB_CORE) += fbdev.o -- GitLab From 7adc0c1cfa7732b81bf7bf2ed16ffb99719ceebf Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 20 Feb 2024 14:43:54 -0400 Subject: [PATCH 1322/1405] iommufd: Reject non-zero data_type if no data_len is provided Since the current design doesn't forward the data_type to the driver to check unless there is a data_len/uptr for a driver specific struct we should check and ensure that data_type is 0 if data_len is 0. Otherwise any value is permitted. Fixes: bd529dbb661d ("iommufd: Add a nested HW pagetable object") Link: https://lore.kernel.org/r/0-v1-9b1ea6869554+110c60-iommufd_ck_data_type_jgg@nvidia.com Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/hw_pagetable.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 3f3f1fa1a0a94..33d142f8057d7 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -263,7 +263,8 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) if (cmd->__reserved) return -EOPNOTSUPP; - if (cmd->data_type == IOMMU_HWPT_DATA_NONE && cmd->data_len) + if ((cmd->data_type == IOMMU_HWPT_DATA_NONE && cmd->data_len) || + (cmd->data_type != IOMMU_HWPT_DATA_NONE && !cmd->data_len)) return -EINVAL; idev = iommufd_get_device(ucmd, cmd->dev_id); -- GitLab From 723a2cc8d69d4342b47dfddbfe6c19f1b135f09b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 16 Feb 2024 20:48:14 -0400 Subject: [PATCH 1323/1405] s390: use the correct count for __iowrite64_copy() The signature for __iowrite64_copy() requires the number of 64 bit quantities, not bytes. Multiple by 8 to get to a byte length before invoking zpci_memcpy_toio() Fixes: 87bc359b9822 ("s390/pci: speed up __iowrite64_copy by using pci store block insn") Acked-by: Niklas Schnelle Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/0-v1-9223d11a7662+1d7785-s390_iowrite64_jgg@nvidia.com Signed-off-by: Heiko Carstens --- arch/s390/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 676ac74026a82..52a44e353796c 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -252,7 +252,7 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res, /* combine single writes by using store-block insn */ void __iowrite64_copy(void __iomem *to, const void *from, size_t count) { - zpci_memcpy_toio(to, from, count); + zpci_memcpy_toio(to, from, count * 8); } void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, -- GitLab From e78fb4eac817308027da88d02e5d0213462a7562 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 20 Feb 2024 09:51:12 -0500 Subject: [PATCH 1324/1405] ring-buffer: Do not let subbuf be bigger than write mask The data on the subbuffer is measured by a write variable that also contains status flags. The counter is just 20 bits in length. If the subbuffer is bigger than then counter, it will fail. Make sure that the subbuffer can not be set to greater than the counter that keeps track of the data on the subbuffer. Link: https://lore.kernel.org/linux-trace-kernel/20240220095112.77e9cb81@gandalf.local.home Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Fixes: 2808e31ec12e5 ("ring-buffer: Add interface for configuring trace sub buffer size") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index fd4bfe3ecf014..0699027b4f4c9 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -5877,6 +5877,10 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order) if (psize <= BUF_PAGE_HDR_SIZE) return -EINVAL; + /* Size of a subbuf cannot be greater than the write counter */ + if (psize > RB_WRITE_MASK + 1) + return -EINVAL; + old_order = buffer->subbuf_order; old_size = buffer->subbuf_size; -- GitLab From b820de741ae48ccf50dd95e297889c286ff4f760 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 15 Feb 2024 12:47:38 -0800 Subject: [PATCH 1325/1405] fs/aio: Restrict kiocb_set_cancel_fn() to I/O submitted via libaio If kiocb_set_cancel_fn() is called for I/O submitted via io_uring, the following kernel warning appears: WARNING: CPU: 3 PID: 368 at fs/aio.c:598 kiocb_set_cancel_fn+0x9c/0xa8 Call trace: kiocb_set_cancel_fn+0x9c/0xa8 ffs_epfile_read_iter+0x144/0x1d0 io_read+0x19c/0x498 io_issue_sqe+0x118/0x27c io_submit_sqes+0x25c/0x5fc __arm64_sys_io_uring_enter+0x104/0xab0 invoke_syscall+0x58/0x11c el0_svc_common+0xb4/0xf4 do_el0_svc+0x2c/0xb0 el0_svc+0x2c/0xa4 el0t_64_sync_handler+0x68/0xb4 el0t_64_sync+0x1a4/0x1a8 Fix this by setting the IOCB_AIO_RW flag for read and write I/O that is submitted by libaio. Suggested-by: Jens Axboe Cc: Christoph Hellwig Cc: Avi Kivity Cc: Sandeep Dhavale Cc: Jens Axboe Cc: Greg Kroah-Hartman Cc: Kent Overstreet Cc: stable@vger.kernel.org Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240215204739.2677806-2-bvanassche@acm.org Signed-off-by: Christian Brauner --- fs/aio.c | 9 ++++++++- include/linux/fs.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/aio.c b/fs/aio.c index bb2ff48991f35..da18dbcfcb220 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -593,6 +593,13 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) struct kioctx *ctx = req->ki_ctx; unsigned long flags; + /* + * kiocb didn't come from aio or is neither a read nor a write, hence + * ignore it. + */ + if (!(iocb->ki_flags & IOCB_AIO_RW)) + return; + if (WARN_ON_ONCE(!list_empty(&req->ki_list))) return; @@ -1509,7 +1516,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) req->ki_complete = aio_complete_rw; req->private = NULL; req->ki_pos = iocb->aio_offset; - req->ki_flags = req->ki_filp->f_iocb_flags; + req->ki_flags = req->ki_filp->f_iocb_flags | IOCB_AIO_RW; if (iocb->aio_flags & IOCB_FLAG_RESFD) req->ki_flags |= IOCB_EVENTFD; if (iocb->aio_flags & IOCB_FLAG_IOPRIO) { diff --git a/include/linux/fs.h b/include/linux/fs.h index ed5966a704951..c2dcc98cb4c8d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -352,6 +352,8 @@ enum rw_hint { * unrelated IO (like cache flushing, new IO generation, etc). */ #define IOCB_DIO_CALLER_COMP (1 << 22) +/* kiocb is a read or write operation submitted by fs/aio.c. */ +#define IOCB_AIO_RW (1 << 23) /* for use in trace events */ #define TRACE_IOCB_STRINGS \ -- GitLab From 4cd12c6065dfcdeba10f49949bffcf383b3952d8 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Mon, 19 Feb 2024 00:09:33 +0900 Subject: [PATCH 1326/1405] bpf, sockmap: Fix NULL pointer dereference in sk_psock_verdict_data_ready() syzbot reported the following NULL pointer dereference issue [1]: BUG: kernel NULL pointer dereference, address: 0000000000000000 [...] RIP: 0010:0x0 [...] Call Trace: sk_psock_verdict_data_ready+0x232/0x340 net/core/skmsg.c:1230 unix_stream_sendmsg+0x9b4/0x1230 net/unix/af_unix.c:2293 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 If sk_psock_verdict_data_ready() and sk_psock_stop_verdict() are called concurrently, psock->saved_data_ready can be NULL, causing the above issue. This patch fixes this issue by calling the appropriate data ready function using the sk_psock_data_ready() helper and protecting it from concurrency with sk->sk_callback_lock. Fixes: 6df7f764cd3c ("bpf, sockmap: Wake up polling after data copy") Reported-by: syzbot+fd7b34375c1c8ce29c93@syzkaller.appspotmail.com Signed-off-by: Shigeru Yoshida Signed-off-by: Daniel Borkmann Tested-by: syzbot+fd7b34375c1c8ce29c93@syzkaller.appspotmail.com Acked-by: John Fastabend Closes: https://syzkaller.appspot.com/bug?extid=fd7b34375c1c8ce29c93 [1] Link: https://lore.kernel.org/bpf/20240218150933.6004-1-syoshida@redhat.com --- net/core/skmsg.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 93ecfceac1bc4..4d75ef9d24bfa 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1226,8 +1226,11 @@ static void sk_psock_verdict_data_ready(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); - if (psock) - psock->saved_data_ready(sk); + if (psock) { + read_lock_bh(&sk->sk_callback_lock); + sk_psock_data_ready(sk, psock); + read_unlock_bh(&sk->sk_callback_lock); + } rcu_read_unlock(); } } -- GitLab From 9bd405c48b0ac4de087c0c4440fd79597201b8a7 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Sat, 3 Feb 2024 21:26:40 +0000 Subject: [PATCH 1327/1405] cache: ax45mp_cache: Align end size to cache boundary in ax45mp_dma_cache_wback() Align the end size to cache boundary size in ax45mp_dma_cache_wback() callback likewise done in ax45mp_dma_cache_inv() callback. Additionally return early in case of start == end. Fixes: d34599bcd2e4 ("cache: Add L2 cache management for Andes AX45MP RISC-V core") Reported-by: Pavel Machek Link: https://lore.kernel.org/cip-dev/ZYsdKDiw7G+kxQ3m@duo.ucw.cz/ Signed-off-by: Lad Prabhakar Signed-off-by: Conor Dooley --- drivers/cache/ax45mp_cache.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cache/ax45mp_cache.c b/drivers/cache/ax45mp_cache.c index 57186c58dc849..1d7dd3d2c101c 100644 --- a/drivers/cache/ax45mp_cache.c +++ b/drivers/cache/ax45mp_cache.c @@ -129,8 +129,12 @@ static void ax45mp_dma_cache_wback(phys_addr_t paddr, size_t size) unsigned long line_size; unsigned long flags; + if (unlikely(start == end)) + return; + line_size = ax45mp_priv.ax45mp_cache_line_size; start = start & (~(line_size - 1)); + end = ((end + line_size - 1) & (~(line_size - 1))); local_irq_save(flags); ax45mp_cpu_dcache_wb_range(start, end); local_irq_restore(flags); -- GitLab From fb33a46cd75e18773dd5a414744507d84ae90870 Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Tue, 20 Feb 2024 19:14:29 +0800 Subject: [PATCH 1328/1405] irqchip/mbigen: Don't use bus_get_dev_root() to find the parent bus_get_dev_root() returns sp->dev_root which is set in subsys_register(), but subsys_register() is not called by platform_bus_init(). Therefor for the platform_bus_type, bus_get_dev_root() always returns NULL. This makes mbigen_of_create_domain() always return -ENODEV. Don't try to retrieve the parent via bus_get_dev_root() and unconditionally hand a NULL pointer to of_platform_device_create() to fix this. Fixes: fea087fc291b ("irqchip/mbigen: move to use bus_get_dev_root()") Signed-off-by: Chen Jun Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240220111429.110666-1-chenjun102@huawei.com --- drivers/irqchip/irq-mbigen.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 5101a3fb11df5..58881d3139792 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -235,22 +235,17 @@ static const struct irq_domain_ops mbigen_domain_ops = { static int mbigen_of_create_domain(struct platform_device *pdev, struct mbigen_device *mgn_chip) { - struct device *parent; struct platform_device *child; struct irq_domain *domain; struct device_node *np; u32 num_pins; int ret = 0; - parent = bus_get_dev_root(&platform_bus_type); - if (!parent) - return -ENODEV; - for_each_child_of_node(pdev->dev.of_node, np) { if (!of_property_read_bool(np, "interrupt-controller")) continue; - child = of_platform_device_create(np, NULL, parent); + child = of_platform_device_create(np, NULL, NULL); if (!child) { ret = -ENOMEM; break; @@ -273,7 +268,6 @@ static int mbigen_of_create_domain(struct platform_device *pdev, } } - put_device(parent); if (ret) of_node_put(np); -- GitLab From 9cec467d0502b24660f413a0e8fc782903b46d5b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 19 Feb 2024 16:44:30 +0100 Subject: [PATCH 1329/1405] ata: libata-core: Do not call ata_dev_power_set_standby() twice For regular system shutdown, ata_dev_power_set_standby() will be executed twice: once the scsi device is removed and another when ata_pci_shutdown_one() executes and EH completes unloading the devices. Make the second call to ata_dev_power_set_standby() do nothing by using ata_dev_power_is_active() and return if the device is already in standby. Fixes: 2da4c5e24e86 ("ata: libata-core: Improve ata_dev_power_set_active()") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Niklas Cassel --- drivers/ata/libata-core.c | 59 ++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index d9f80f4f70f5b..be3412cdb22e7 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2001,6 +2001,33 @@ bool ata_dev_power_init_tf(struct ata_device *dev, struct ata_taskfile *tf, return true; } +static bool ata_dev_power_is_active(struct ata_device *dev) +{ + struct ata_taskfile tf; + unsigned int err_mask; + + ata_tf_init(dev, &tf); + tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; + tf.protocol = ATA_PROT_NODATA; + tf.command = ATA_CMD_CHK_POWER; + + err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); + if (err_mask) { + ata_dev_err(dev, "Check power mode failed (err_mask=0x%x)\n", + err_mask); + /* + * Assume we are in standby mode so that we always force a + * spinup in ata_dev_power_set_active(). + */ + return false; + } + + ata_dev_dbg(dev, "Power mode: 0x%02x\n", tf.nsect); + + /* Active or idle */ + return tf.nsect == 0xff; +} + /** * ata_dev_power_set_standby - Set a device power mode to standby * @dev: target device @@ -2017,8 +2044,9 @@ void ata_dev_power_set_standby(struct ata_device *dev) struct ata_taskfile tf; unsigned int err_mask; - /* If the device is already sleeping, do nothing. */ - if (dev->flags & ATA_DFLAG_SLEEPING) + /* If the device is already sleeping or in standby, do nothing. */ + if ((dev->flags & ATA_DFLAG_SLEEPING) || + !ata_dev_power_is_active(dev)) return; /* @@ -2046,33 +2074,6 @@ void ata_dev_power_set_standby(struct ata_device *dev) err_mask); } -static bool ata_dev_power_is_active(struct ata_device *dev) -{ - struct ata_taskfile tf; - unsigned int err_mask; - - ata_tf_init(dev, &tf); - tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; - tf.protocol = ATA_PROT_NODATA; - tf.command = ATA_CMD_CHK_POWER; - - err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); - if (err_mask) { - ata_dev_err(dev, "Check power mode failed (err_mask=0x%x)\n", - err_mask); - /* - * Assume we are in standby mode so that we always force a - * spinup in ata_dev_power_set_active(). - */ - return false; - } - - ata_dev_dbg(dev, "Power mode: 0x%02x\n", tf.nsect); - - /* Active or idle */ - return tf.nsect == 0xff; -} - /** * ata_dev_power_set_active - Set a device power mode to active * @dev: target device -- GitLab From ec4308ecfc887128a468f03fb66b767559c57c23 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Mon, 19 Feb 2024 18:58:06 +0000 Subject: [PATCH 1330/1405] irqchip/gic-v3-its: Do not assume vPE tables are preallocated The GIC/ITS code is designed to ensure to pick up any preallocated LPI tables on the redistributors, as enabling LPIs is a one-way switch. There is no such restriction for vLPIs, and for GICv4.1 it is expected to allocate a new vPE table at boot. This works as intended when initializing an ITS, however when setting up a redistributor in cpu_init_lpis() the early return for preallocated RD tables skips straight past the GICv4 setup. This all comes to a head when trying to kexec() into a new kernel, as the new kernel silently fails to set up GICv4, leading to a complete loss of SGIs and LPIs for KVM VMs. Slap a band-aid on the problem by ensuring its_cpu_init_lpis() always initializes GICv4 on the way out, even if the other RD tables were preallocated. Fixes: 6479450f72c1 ("irqchip/gic-v4: Fix occasional VLPI drop") Reported-by: George Cherian Co-developed-by: Marc Zyngier Signed-off-by: Marc Zyngier Signed-off-by: Oliver Upton Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240219185809.286724-2-oliver.upton@linux.dev --- drivers/irqchip/irq-gic-v3-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 53abd4779914d..b822752c42617 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3181,6 +3181,7 @@ static void its_cpu_init_lpis(void) val |= GICR_CTLR_ENABLE_LPIS; writel_relaxed(val, rbase + GICR_CTLR); +out: if (gic_rdists->has_vlpis && !gic_rdists->has_rvpeid) { void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); @@ -3216,7 +3217,6 @@ static void its_cpu_init_lpis(void) /* Make sure the GIC has seen the above */ dsb(sy); -out: gic_data_rdist()->flags |= RD_LOCAL_LPI_ENABLED; pr_info("GICv3: CPU%d: using %s LPI pending table @%pa\n", smp_processor_id(), -- GitLab From b7b2ffc3ca59b06397550f96febe95f3f153eb1e Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 15 Feb 2024 07:41:09 +0100 Subject: [PATCH 1331/1405] docs: translations: use attribute to store current language Akira Yokosawa reported [1] that the "translations" extension we added in commit 7418ec5b151f ("docs: translations: add translations links when they exist") broke the build on Sphinx versions v6.1.3 through 7.1.2 (possibly others) with the following error: Exception occurred: File "/usr/lib/python3.12/site-packages/sphinx/util/nodes.py", line 624, in _copy_except__document newnode = self.__class__(rawsource=self.rawsource, **self.attributes) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ TypeError: LanguagesNode.__init__() missing 1 required positional argument: 'current_language' The full traceback has been saved in /tmp/sphinx-err-7xmwytuu.log, if you want to report the issue to the developers. Solve this problem by making 'current_language' a true element attribute of the LanguagesNode element, which is probably the more correct way to do it anyway. Tested on Sphinx 2.x, 3.x, 6.x, and 7.x. [1]: https://lore.kernel.org/all/54a56c2e-a27c-45a0-b712-02a7bc7d2673@gmail.com/ Fixes: 7418ec5b151f ("docs: translations: add translations links when they exist") Reported-by: Akira Yokosawa Signed-off-by: Vegard Nossum Closes: https://lore.kernel.org/all/54a56c2e-a27c-45a0-b712-02a7bc7d2673@gmail.com/ Tested-by: Akira Yokosawa # Sphinx 4.3.2, 5.3.0 and 6.2.1 Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20240215064109.1193556-1-vegard.nossum@oracle.com --- Documentation/sphinx/translations.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Documentation/sphinx/translations.py b/Documentation/sphinx/translations.py index 47161e6eba997..32c2b32b2b5ee 100644 --- a/Documentation/sphinx/translations.py +++ b/Documentation/sphinx/translations.py @@ -29,10 +29,7 @@ all_languages = { } class LanguagesNode(nodes.Element): - def __init__(self, current_language, *args, **kwargs): - super().__init__(*args, **kwargs) - - self.current_language = current_language + pass class TranslationsTransform(Transform): default_priority = 900 @@ -49,7 +46,8 @@ class TranslationsTransform(Transform): # normalize docname to be the untranslated one docname = os.path.join(*components[2:]) - new_nodes = LanguagesNode(all_languages[this_lang_code]) + new_nodes = LanguagesNode() + new_nodes['current_language'] = all_languages[this_lang_code] for lang_code, lang_name in all_languages.items(): if lang_code == this_lang_code: @@ -84,7 +82,7 @@ def process_languages(app, doctree, docname): html_content = app.builder.templates.render('translations.html', context={ - 'current_language': node.current_language, + 'current_language': node['current_language'], 'languages': languages, }) -- GitLab From d56e460e19ea8382f813eb489730248ec8d7eb73 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 21 Feb 2024 06:01:20 -0800 Subject: [PATCH 1332/1405] hwmon: (nct6775) Fix access to temperature configuration registers The number of temperature configuration registers does not always match the total number of temperature registers. This can result in access errors reported if KASAN is enabled. BUG: KASAN: global-out-of-bounds in nct6775_probe+0x5654/0x6fe9 nct6775_core Reported-by: Erhard Furtner Closes: https://lore.kernel.org/linux-hwmon/d51181d1-d26b-42b2-b002-3f5a4037721f@roeck-us.net/ Fixes: b7f1f7b2523a ("hwmon: (nct6775) Additional TEMP registers for nct6799") Cc: Ahmad Khalifa Tested-by: Ahmad Khalifa Signed-off-by: Guenter Roeck --- drivers/hwmon/nct6775-core.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/nct6775-core.c b/drivers/hwmon/nct6775-core.c index 8d2ef3145bca3..9fbab8f023340 100644 --- a/drivers/hwmon/nct6775-core.c +++ b/drivers/hwmon/nct6775-core.c @@ -3512,6 +3512,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, const u16 *reg_temp_mon, *reg_temp_alternate, *reg_temp_crit; const u16 *reg_temp_crit_l = NULL, *reg_temp_crit_h = NULL; int num_reg_temp, num_reg_temp_mon, num_reg_tsi_temp; + int num_reg_temp_config; struct device *hwmon_dev; struct sensor_template_group tsi_temp_tg; @@ -3594,6 +3595,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6106_REG_TEMP_OVER; reg_temp_hyst = NCT6106_REG_TEMP_HYST; reg_temp_config = NCT6106_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6106_REG_TEMP_CONFIG); reg_temp_alternate = NCT6106_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6106_REG_TEMP_CRIT; reg_temp_crit_l = NCT6106_REG_TEMP_CRIT_L; @@ -3669,6 +3671,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6106_REG_TEMP_OVER; reg_temp_hyst = NCT6106_REG_TEMP_HYST; reg_temp_config = NCT6106_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6106_REG_TEMP_CONFIG); reg_temp_alternate = NCT6106_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6106_REG_TEMP_CRIT; reg_temp_crit_l = NCT6106_REG_TEMP_CRIT_L; @@ -3746,6 +3749,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6775_REG_TEMP_OVER; reg_temp_hyst = NCT6775_REG_TEMP_HYST; reg_temp_config = NCT6775_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6775_REG_TEMP_CONFIG); reg_temp_alternate = NCT6775_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6775_REG_TEMP_CRIT; @@ -3821,6 +3825,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6775_REG_TEMP_OVER; reg_temp_hyst = NCT6775_REG_TEMP_HYST; reg_temp_config = NCT6776_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6776_REG_TEMP_CONFIG); reg_temp_alternate = NCT6776_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6776_REG_TEMP_CRIT; @@ -3900,6 +3905,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6779_REG_TEMP_OVER; reg_temp_hyst = NCT6779_REG_TEMP_HYST; reg_temp_config = NCT6779_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6779_REG_TEMP_CONFIG); reg_temp_alternate = NCT6779_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6779_REG_TEMP_CRIT; @@ -4034,6 +4040,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6779_REG_TEMP_OVER; reg_temp_hyst = NCT6779_REG_TEMP_HYST; reg_temp_config = NCT6779_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6779_REG_TEMP_CONFIG); reg_temp_alternate = NCT6779_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6779_REG_TEMP_CRIT; @@ -4123,6 +4130,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, reg_temp_over = NCT6798_REG_TEMP_OVER; reg_temp_hyst = NCT6798_REG_TEMP_HYST; reg_temp_config = NCT6779_REG_TEMP_CONFIG; + num_reg_temp_config = ARRAY_SIZE(NCT6779_REG_TEMP_CONFIG); reg_temp_alternate = NCT6798_REG_TEMP_ALTERNATE; reg_temp_crit = NCT6798_REG_TEMP_CRIT; @@ -4204,7 +4212,8 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, = reg_temp_crit[src - 1]; if (reg_temp_crit_l && reg_temp_crit_l[i]) data->reg_temp[4][src - 1] = reg_temp_crit_l[i]; - data->reg_temp_config[src - 1] = reg_temp_config[i]; + if (i < num_reg_temp_config) + data->reg_temp_config[src - 1] = reg_temp_config[i]; data->temp_src[src - 1] = src; continue; } @@ -4217,7 +4226,8 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data, data->reg_temp[0][s] = reg_temp[i]; data->reg_temp[1][s] = reg_temp_over[i]; data->reg_temp[2][s] = reg_temp_hyst[i]; - data->reg_temp_config[s] = reg_temp_config[i]; + if (i < num_reg_temp_config) + data->reg_temp_config[s] = reg_temp_config[i]; if (reg_temp_crit_h && reg_temp_crit_h[i]) data->reg_temp[3][s] = reg_temp_crit_h[i]; else if (reg_temp_crit[src - 1]) -- GitLab From 136cfaca22567a03bbb3bf53a43d8cb5748b80ec Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Wed, 14 Feb 2024 19:27:33 +0300 Subject: [PATCH 1333/1405] gtp: fix use-after-free and null-ptr-deref in gtp_genl_dump_pdp() The gtp_net_ops pernet operations structure for the subsystem must be registered before registering the generic netlink family. Syzkaller hit 'general protection fault in gtp_genl_dump_pdp' bug: general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] CPU: 1 PID: 5826 Comm: gtp Not tainted 6.8.0-rc3-std-def-alt1 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-alt1 04/01/2014 RIP: 0010:gtp_genl_dump_pdp+0x1be/0x800 [gtp] Code: c6 89 c6 e8 64 e9 86 df 58 45 85 f6 0f 85 4e 04 00 00 e8 c5 ee 86 df 48 8b 54 24 18 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 de 05 00 00 48 8b 44 24 18 4c 8b 30 4c 39 f0 74 RSP: 0018:ffff888014107220 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000002 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: ffff88800fcda588 R14: 0000000000000001 R15: 0000000000000000 FS: 00007f1be4eb05c0(0000) GS:ffff88806ce80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1be4e766cf CR3: 000000000c33e000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: ? show_regs+0x90/0xa0 ? die_addr+0x50/0xd0 ? exc_general_protection+0x148/0x220 ? asm_exc_general_protection+0x22/0x30 ? gtp_genl_dump_pdp+0x1be/0x800 [gtp] ? __alloc_skb+0x1dd/0x350 ? __pfx___alloc_skb+0x10/0x10 genl_dumpit+0x11d/0x230 netlink_dump+0x5b9/0xce0 ? lockdep_hardirqs_on_prepare+0x253/0x430 ? __pfx_netlink_dump+0x10/0x10 ? kasan_save_track+0x10/0x40 ? __kasan_kmalloc+0x9b/0xa0 ? genl_start+0x675/0x970 __netlink_dump_start+0x6fc/0x9f0 genl_family_rcv_msg_dumpit+0x1bb/0x2d0 ? __pfx_genl_family_rcv_msg_dumpit+0x10/0x10 ? genl_op_from_small+0x2a/0x440 ? cap_capable+0x1d0/0x240 ? __pfx_genl_start+0x10/0x10 ? __pfx_genl_dumpit+0x10/0x10 ? __pfx_genl_done+0x10/0x10 ? security_capable+0x9d/0xe0 Cc: stable@vger.kernel.org Signed-off-by: Vasiliy Kovalev Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Link: https://lore.kernel.org/r/20240214162733.34214-1-kovalev@altlinux.org Signed-off-by: Jakub Kicinski --- drivers/net/gtp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index b1919278e931f..2129ae42c7030 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1907,20 +1907,20 @@ static int __init gtp_init(void) if (err < 0) goto error_out; - err = genl_register_family(>p_genl_family); + err = register_pernet_subsys(>p_net_ops); if (err < 0) goto unreg_rtnl_link; - err = register_pernet_subsys(>p_net_ops); + err = genl_register_family(>p_genl_family); if (err < 0) - goto unreg_genl_family; + goto unreg_pernet_subsys; pr_info("GTP module loaded (pdp ctx size %zd bytes)\n", sizeof(struct pdp_ctx)); return 0; -unreg_genl_family: - genl_unregister_family(>p_genl_family); +unreg_pernet_subsys: + unregister_pernet_subsys(>p_net_ops); unreg_rtnl_link: rtnl_link_unregister(>p_link_ops); error_out: -- GitLab From 10f41d0710fc81b7af93fa6106678d57b1ff24a7 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 15 Feb 2024 17:17:29 +0100 Subject: [PATCH 1334/1405] tls: break out of main loop when PEEK gets a non-data record PEEK needs to leave decrypted records on the rx_list so that we can receive them later on, so it jumps back into the async code that queues the skb. Unfortunately that makes us skip the TLS_RECORD_TYPE_DATA check at the bottom of the main loop, so if two records of the same (non-DATA) type are queued, we end up merging them. Add the same record type check, and make it unlikely to not penalize the async fastpath. Async decrypt only applies to data record, so this check is only needed for PEEK. process_rx_list also has similar issues. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/3df2eef4fdae720c55e69472b5bea668772b45a2.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 9fbc70200cd0f..78aedfc682ba8 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2064,6 +2064,8 @@ int tls_sw_recvmsg(struct sock *sk, decrypted += chunk; len -= chunk; __skb_queue_tail(&ctx->rx_list, skb); + if (unlikely(control != TLS_RECORD_TYPE_DATA)) + break; continue; } -- GitLab From fdfbaec5923d9359698cbb286bc0deadbb717504 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 15 Feb 2024 17:17:30 +0100 Subject: [PATCH 1335/1405] tls: stop recv() if initial process_rx_list gave us non-DATA If we have a non-DATA record on the rx_list and another record of the same type still on the queue, we will end up merging them: - process_rx_list copies the non-DATA record - we start the loop and process the first available record since it's of the same type - we break out of the loop since the record was not DATA Just check the record type and jump to the end in case process_rx_list did some work. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/bd31449e43bd4b6ff546f5c51cf958c31c511deb.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 78aedfc682ba8..43dd0d82b6ed7 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1971,7 +1971,7 @@ int tls_sw_recvmsg(struct sock *sk, goto end; copied = err; - if (len <= copied) + if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA)) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); -- GitLab From ec823bf3a479d42c589dc0f28ef4951c49cd2d2a Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 15 Feb 2024 17:17:31 +0100 Subject: [PATCH 1336/1405] tls: don't skip over different type records from the rx_list If we queue 3 records: - record 1, type DATA - record 2, some other type - record 3, type DATA and do a recv(PEEK), the rx_list will contain the first two records. The next large recv will walk through the rx_list and copy data from record 1, then stop because record 2 is a different type. Since we haven't filled up our buffer, we will process the next available record. It's also DATA, so we can merge it with the current read. We shouldn't do that, since there was a record in between that we ignored. Add a flag to let process_rx_list inform tls_sw_recvmsg that it had more data available. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/f00c0c0afa080c60f016df1471158c1caf983c34.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 43dd0d82b6ed7..de96959336c48 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1772,7 +1772,8 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, u8 *control, size_t skip, size_t len, - bool is_peek) + bool is_peek, + bool *more) { struct sk_buff *skb = skb_peek(&ctx->rx_list); struct tls_msg *tlm; @@ -1785,7 +1786,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; if (skip < rxm->full_len) break; @@ -1803,12 +1804,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; err = skb_copy_datagram_msg(skb, rxm->offset + skip, msg, chunk); if (err < 0) - goto out; + goto more; len = len - chunk; copied = copied + chunk; @@ -1844,6 +1845,10 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, out: return copied ? : err; +more: + if (more) + *more = true; + goto out; } static bool @@ -1947,6 +1952,7 @@ int tls_sw_recvmsg(struct sock *sk, int target, err; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; + bool rx_more = false; bool released = true; bool bpf_strp_enabled; bool zc_capable; @@ -1966,12 +1972,12 @@ int tls_sw_recvmsg(struct sock *sk, goto end; /* Process pending decrypted records. It must be non-zero-copy */ - err = process_rx_list(ctx, msg, &control, 0, len, is_peek); + err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more); if (err < 0) goto end; copied = err; - if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA)) + if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); @@ -2130,10 +2136,10 @@ int tls_sw_recvmsg(struct sock *sk, /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) err = process_rx_list(ctx, msg, &control, copied, - decrypted, is_peek); + decrypted, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, - async_copy_bytes, is_peek); + async_copy_bytes, is_peek, NULL); } copied += decrypted; -- GitLab From 7b2a4c2a623a9f9c5fd9cff499bb9457fa2192ec Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 15 Feb 2024 17:17:32 +0100 Subject: [PATCH 1337/1405] selftests: tls: add test for merging of same-type control messages Two consecutive control messages of the same type should never be merged into one large received blob of data. Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/018f1633d5471684c65def5fe390de3b15c3d683.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 49c84602707f8..2714c230a0f9f 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1485,6 +1485,32 @@ TEST_F(tls, control_msg) EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } +TEST_F(tls, control_msg_nomerge) +{ + char *rec1 = "1111"; + char *rec2 = "2222"; + int send_len = 5; + char buf[15]; + + if (self->notls) + SKIP(return, "no TLS support"); + + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec1, send_len, 0), send_len); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len); + EXPECT_EQ(memcmp(buf, rec2, send_len), 0); +} + TEST_F(tls, shutdown) { char const *test_str = "test_read"; -- GitLab From 2bf6172632e1d4a6ec7ee7e734d53a43a145f5c6 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 15 Feb 2024 17:17:33 +0100 Subject: [PATCH 1338/1405] selftests: tls: add test for peeking past a record of a different type If we queue 3 records: - record 1, type DATA - record 2, some other type - record 3, type DATA the current code can look past the 2nd record and merge the 2 data records. Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/4623550f8617c239581030c13402d3262f2bd14f.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 2714c230a0f9f..b95c249f81c25 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1511,6 +1511,25 @@ TEST_F(tls, control_msg_nomerge) EXPECT_EQ(memcmp(buf, rec2, send_len), 0); } +TEST_F(tls, data_control_data) +{ + char *rec1 = "1111"; + char *rec2 = "2222"; + char *rec3 = "3333"; + int send_len = 5; + char buf[15]; + + if (self->notls) + SKIP(return, "no TLS support"); + + EXPECT_EQ(send(self->fd, rec1, send_len, 0), send_len); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len); + EXPECT_EQ(send(self->fd, rec3, send_len, 0), send_len); + + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); +} + TEST_F(tls, shutdown) { char const *test_str = "test_read"; -- GitLab From bccebf64701735533c8db37773eeacc6566cc8ec Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 19 Feb 2024 16:58:04 +0100 Subject: [PATCH 1339/1405] netfilter: nf_tables: set dormant flag on hook register failure We need to set the dormant flag again if we fail to register the hooks. During memory pressure hook registration can fail and we end up with a table marked as active but no registered hooks. On table/base chain deletion, nf_tables will attempt to unregister the hook again which yields a warn splat from the nftables core. Reported-and-tested-by: syzbot+de4025c006ec68ac56fc@syzkaller.appspotmail.com Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f8e3f70c35bd5..90038d778f373 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1251,6 +1251,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) return 0; err_register_hooks: + ctx->table->flags |= NFT_TABLE_F_DORMANT; nft_trans_destroy(trans); return ret; } -- GitLab From 9e0f0430389be7696396c62f037be4bf72cf93e3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 21 Feb 2024 12:32:58 +0100 Subject: [PATCH 1340/1405] netfilter: nft_flow_offload: reset dst in route object after setting up flow dst is transferred to the flow object, route object does not own it anymore. Reset dst in route object, otherwise if flow_offload_add() fails, error path releases dst twice, leading to a refcount underflow. Fixes: a3c90f7a2323 ("netfilter: nf_tables: flow offload expression") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 2 +- net/netfilter/nf_flow_table_core.c | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 956c752ceb318..a763dd327c6ea 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -276,7 +276,7 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, } void flow_offload_route_init(struct flow_offload *flow, - const struct nf_flow_route *route); + struct nf_flow_route *route); int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); void flow_offload_refresh(struct nf_flowtable *flow_table, diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 920a5a29ae1dc..7502d6d73a600 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -87,12 +87,22 @@ static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) return 0; } +static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route, + enum flow_offload_tuple_dir dir) +{ + struct dst_entry *dst = route->tuple[dir].dst; + + route->tuple[dir].dst = NULL; + + return dst; +} + static int flow_offload_fill_route(struct flow_offload *flow, - const struct nf_flow_route *route, + struct nf_flow_route *route, enum flow_offload_tuple_dir dir) { struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple; - struct dst_entry *dst = route->tuple[dir].dst; + struct dst_entry *dst = nft_route_dst_fetch(route, dir); int i, j = 0; switch (flow_tuple->l3proto) { @@ -146,7 +156,7 @@ static void nft_flow_dst_release(struct flow_offload *flow, } void flow_offload_route_init(struct flow_offload *flow, - const struct nf_flow_route *route) + struct nf_flow_route *route) { flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); -- GitLab From 8762785f459be1cfe6fcf7285c123aad6a3703f0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 20 Feb 2024 21:36:39 +0100 Subject: [PATCH 1341/1405] netfilter: nft_flow_offload: release dst in case direct xmit path is used Direct xmit does not use it since it calls dev_queue_xmit() to send packets, hence it calls dst_release(). kmemleak reports: unreferenced object 0xffff88814f440900 (size 184): comm "softirq", pid 0, jiffies 4294951896 hex dump (first 32 bytes): 00 60 5b 04 81 88 ff ff 00 e6 e8 82 ff ff ff ff .`[............. 21 0b 50 82 ff ff ff ff 00 00 00 00 00 00 00 00 !.P............. backtrace (crc cb2bf5d6): [<000000003ee17107>] kmem_cache_alloc+0x286/0x340 [<0000000021a5de2c>] dst_alloc+0x43/0xb0 [<00000000f0671159>] rt_dst_alloc+0x2e/0x190 [<00000000fe5092c9>] __mkroute_output+0x244/0x980 [<000000005fb96fb0>] ip_route_output_flow+0xc0/0x160 [<0000000045367433>] nf_ip_route+0xf/0x30 [<0000000085da1d8e>] nf_route+0x2d/0x60 [<00000000d1ecd1cb>] nft_flow_route+0x171/0x6a0 [nft_flow_offload] [<00000000d9b2fb60>] nft_flow_offload_eval+0x4e8/0x700 [nft_flow_offload] [<000000009f447dbb>] expr_call_ops_eval+0x53/0x330 [nf_tables] [<00000000072e1be6>] nft_do_chain+0x17c/0x840 [nf_tables] [<00000000d0551029>] nft_do_chain_inet+0xa1/0x210 [nf_tables] [<0000000097c9d5c6>] nf_hook_slow+0x5b/0x160 [<0000000005eccab1>] ip_forward+0x8b6/0x9b0 [<00000000553a269b>] ip_rcv+0x221/0x230 [<00000000412872e5>] __netif_receive_skb_one_core+0xfe/0x110 Fixes: fa502c865666 ("netfilter: flowtable: simplify route logic") Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 7502d6d73a600..a0571339239c4 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -132,6 +132,7 @@ static int flow_offload_fill_route(struct flow_offload *flow, ETH_ALEN); flow_tuple->out.ifidx = route->tuple[dir].out.ifindex; flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex; + dst_release(dst); break; case FLOW_OFFLOAD_XMIT_XFRM: case FLOW_OFFLOAD_XMIT_NEIGH: -- GitLab From d472e9853d7b46a6b094224d131d09ccd3a03daf Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 19 Feb 2024 19:43:53 +0100 Subject: [PATCH 1342/1405] netfilter: nf_tables: register hooks last when adding new chain/flowtable Register hooks last when adding chain/flowtable to ensure that packets do not walk over datastructure that is being released in the error path without waiting for the rcu grace period. Fixes: 91c7b38dc9f0 ("netfilter: nf_tables: use new transaction infrastructure to handle chain") Fixes: 3b49e2e94e6e ("netfilter: nf_tables: add flow table netlink frontend") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 78 ++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 90038d778f373..485e047d5f98b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -684,15 +684,16 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) return err; } -static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, - struct nft_flowtable *flowtable) +static struct nft_trans * +nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, + struct nft_flowtable *flowtable) { struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_flowtable)); if (trans == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (msg_type == NFT_MSG_NEWFLOWTABLE) nft_activate_next(ctx->net, flowtable); @@ -701,22 +702,22 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, nft_trans_flowtable(trans) = flowtable; nft_trans_commit_list_add_tail(ctx->net, trans); - return 0; + return trans; } static int nft_delflowtable(struct nft_ctx *ctx, struct nft_flowtable *flowtable) { - int err; + struct nft_trans *trans; - err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); - if (err < 0) - return err; + trans = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); + if (IS_ERR(trans)) + return PTR_ERR(trans); nft_deactivate_next(ctx->net, flowtable); nft_use_dec(&ctx->table->use); - return err; + return 0; } static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg) @@ -2504,19 +2505,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, RCU_INIT_POINTER(chain->blob_gen_0, blob); RCU_INIT_POINTER(chain->blob_gen_1, blob); - err = nf_tables_register_hook(net, table, chain); - if (err < 0) - goto err_destroy_chain; - if (!nft_use_inc(&table->use)) { err = -EMFILE; - goto err_use; + goto err_destroy_chain; } trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (IS_ERR(trans)) { err = PTR_ERR(trans); - goto err_unregister_hook; + goto err_trans; } nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET; @@ -2524,17 +2521,22 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, nft_trans_chain_policy(trans) = policy; err = nft_chain_add(table, chain); - if (err < 0) { - nft_trans_destroy(trans); - goto err_unregister_hook; - } + if (err < 0) + goto err_chain_add; + + /* This must be LAST to ensure no packets are walking over this chain. */ + err = nf_tables_register_hook(net, table, chain); + if (err < 0) + goto err_register_hook; return 0; -err_unregister_hook: +err_register_hook: + nft_chain_del(chain); +err_chain_add: + nft_trans_destroy(trans); +err_trans: nft_use_dec_restore(&table->use); -err_use: - nf_tables_unregister_hook(net, table, chain); err_destroy_chain: nf_tables_chain_destroy(ctx); @@ -8456,9 +8458,9 @@ static int nf_tables_newflowtable(struct sk_buff *skb, u8 family = info->nfmsg->nfgen_family; const struct nf_flowtable_type *type; struct nft_flowtable *flowtable; - struct nft_hook *hook, *next; struct net *net = info->net; struct nft_table *table; + struct nft_trans *trans; struct nft_ctx ctx; int err; @@ -8538,34 +8540,34 @@ static int nf_tables_newflowtable(struct sk_buff *skb, err = nft_flowtable_parse_hook(&ctx, nla, &flowtable_hook, flowtable, extack, true); if (err < 0) - goto err4; + goto err_flowtable_parse_hooks; list_splice(&flowtable_hook.list, &flowtable->hook_list); flowtable->data.priority = flowtable_hook.priority; flowtable->hooknum = flowtable_hook.num; + trans = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto err_flowtable_trans; + } + + /* This must be LAST to ensure no packets are walking over this flowtable. */ err = nft_register_flowtable_net_hooks(ctx.net, table, &flowtable->hook_list, flowtable); - if (err < 0) { - nft_hooks_destroy(&flowtable->hook_list); - goto err4; - } - - err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); if (err < 0) - goto err5; + goto err_flowtable_hooks; list_add_tail_rcu(&flowtable->list, &table->flowtables); return 0; -err5: - list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { - nft_unregister_flowtable_hook(net, flowtable, hook); - list_del_rcu(&hook->list); - kfree_rcu(hook, rcu); - } -err4: + +err_flowtable_hooks: + nft_trans_destroy(trans); +err_flowtable_trans: + nft_hooks_destroy(&flowtable->hook_list); +err_flowtable_parse_hooks: flowtable->data.type->free(&flowtable->data); err3: module_put(type->owner); -- GitLab From 195e5f88c2e48330ba5483e0bad2de3b3fad484f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Feb 2024 18:38:45 +0100 Subject: [PATCH 1343/1405] netfilter: nf_tables: use kzalloc for hook allocation KMSAN reports unitialized variable when registering the hook, reg->hook_ops_type == NF_HOOK_OP_BPF) ~~~~~~~~~~~ undefined This is a small structure, just use kzalloc to make sure this won't happen again when new fields get added to nf_hook_ops. Fixes: 7b4b2fa37587 ("netfilter: annotate nf_tables base hook ops") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 485e047d5f98b..7e938c7397dda 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2082,7 +2082,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, struct nft_hook *hook; int err; - hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); + hook = kzalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); if (!hook) { err = -ENOMEM; goto err_hook_alloc; -- GitLab From 9990889be14288d4f1743e4768222d5032a79c27 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 15 Feb 2024 15:53:08 +0800 Subject: [PATCH 1344/1405] net: mctp: put sock on tag allocation failure We may hold an extra reference on a socket if a tag allocation fails: we optimistically allocate the sk_key, and take a ref there, but do not drop if we end up not using the allocated key. Ensure we're dropping the sock on this failure by doing a proper unref rather than directly kfree()ing. Fixes: de8a6b15d965 ("net: mctp: add an explicit reference from a mctp_sk_key to sock") Signed-off-by: Jeremy Kerr Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/ce9b61e44d1cdae7797be0c5e3141baf582d23a0.1707983487.git.jk@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- net/mctp/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mctp/route.c b/net/mctp/route.c index 7a47a58aa54b4..6218dcd07e184 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -663,7 +663,7 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, spin_unlock_irqrestore(&mns->keys_lock, flags); if (!tagbits) { - kfree(key); + mctp_key_unref(key); return ERR_PTR(-EBUSY); } -- GitLab From e4fe082c38cd74a8fa384bc7542cf3edf1cb7318 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 20 Feb 2024 08:11:11 -0800 Subject: [PATCH 1345/1405] tools: ynl: make sure we always pass yarg to mnl_cb_run There is one common error handler in ynl - ynl_cb_error(). It expects priv to be a pointer to struct ynl_parse_arg AKA yarg. To avoid potential crashes if we encounter a stray NLMSG_ERROR always pass yarg as priv (or a struct which has it as the first member). ynl_cb_null() has a similar problem directly - it expects yarg but priv passed by the caller is ys. Found by code inspection. Fixes: 86878f14d71a ("tools: ynl: user space helpers") Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20240220161112.2735195-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index c82a7f41b31c5..9e41c8c0cc99f 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -466,6 +466,8 @@ ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version) int ynl_recv_ack(struct ynl_sock *ys, int ret) { + struct ynl_parse_arg yarg = { .ys = ys, }; + if (!ret) { yerr(ys, YNL_ERROR_EXPECT_ACK, "Expecting an ACK but nothing received"); @@ -478,7 +480,7 @@ int ynl_recv_ack(struct ynl_sock *ys, int ret) return ret; } return mnl_cb_run(ys->rx_buf, ret, ys->seq, ys->portid, - ynl_cb_null, ys); + ynl_cb_null, &yarg); } int ynl_cb_null(const struct nlmsghdr *nlh, void *data) @@ -741,11 +743,14 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh) static int ynl_ntf_trampoline(const struct nlmsghdr *nlh, void *data) { - return ynl_ntf_parse((struct ynl_sock *)data, nlh); + struct ynl_parse_arg *yarg = data; + + return ynl_ntf_parse(yarg->ys, nlh); } int ynl_ntf_check(struct ynl_sock *ys) { + struct ynl_parse_arg yarg = { .ys = ys, }; ssize_t len; int err; @@ -767,7 +772,7 @@ int ynl_ntf_check(struct ynl_sock *ys) return len; err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid, - ynl_ntf_trampoline, ys, + ynl_ntf_trampoline, &yarg, ynl_cb_array, NLMSG_MIN_TYPE); if (err < 0) return err; -- GitLab From 5d78b73e851455d525a064f3b042b29fdc0c1a4a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 20 Feb 2024 08:11:12 -0800 Subject: [PATCH 1346/1405] tools: ynl: don't leak mcast_groups on init error Make sure to free the already-parsed mcast_groups if we don't get an ack from the kernel when reading family info. This is part of the ynl_sock_create() error path, so we won't get a call to ynl_sock_destroy() to free them later. Fixes: 86878f14d71a ("tools: ynl: user space helpers") Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20240220161112.2735195-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index 9e41c8c0cc99f..6e6d474c83664 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -588,7 +588,13 @@ static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name) return err; } - return ynl_recv_ack(ys, err); + err = ynl_recv_ack(ys, err); + if (err < 0) { + free(ys->mcast_groups); + return err; + } + + return 0; } struct ynl_sock * -- GitLab From 90d07e36d40079a22ca99edd2412e7e9c2382968 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Tue, 20 Feb 2024 09:22:46 +0100 Subject: [PATCH 1347/1405] net: stmmac: Fix EST offset for dwmac 5.10 Fix EST offset for dwmac 5.10. Currently configuring Qbv doesn't work as expected. The schedule is configured, but never confirmed: |[ 128.250219] imx-dwmac 428a0000.ethernet eth1: configured EST The reason seems to be the refactoring of the EST code which set the wrong EST offset for the dwmac 5.10. After fixing this it works as before: |[ 106.359577] imx-dwmac 428a0000.ethernet eth1: configured EST |[ 128.430715] imx-dwmac 428a0000.ethernet eth1: EST: SWOL has been switched Tested on imx93. Fixes: c3f3b97238f6 ("net: stmmac: Refactor EST implementation") Signed-off-by: Kurt Kanzenbach Reviewed-by: Serge Semin Link: https://lore.kernel.org/r/20240220-stmmac_est-v1-1-c41f9ae2e7b7@linutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/hwif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 1bd34b2a47e81..29367105df548 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -224,7 +224,7 @@ static const struct stmmac_hwif_entry { .regs = { .ptp_off = PTP_GMAC4_OFFSET, .mmc_off = MMC_GMAC4_OFFSET, - .est_off = EST_XGMAC_OFFSET, + .est_off = EST_GMAC4_OFFSET, }, .desc = &dwmac4_desc_ops, .dma = &dwmac410_dma_ops, -- GitLab From 61c43780e9444123410cd48c2483e01d2b8f75e8 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 20 Feb 2024 08:52:45 +0100 Subject: [PATCH 1348/1405] devlink: fix port dump cmd type Unlike other commands, due to a c&p error, port dump fills-up cmd with wrong value, different from port-get request cmd, port-get doit reply and port notification. Fix it by filling cmd with value DEVLINK_CMD_PORT_NEW. Skimmed through devlink userspace implementations, none of them cares about this cmd value. Only ynl, for which, this is actually a fix, as it expects doit and dumpit ops rsp_value to be the same. Omit the fixes tag, even thought this is fix, better to target this for next release. Fixes: bfcd3a466172 ("Introduce devlink infrastructure") Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20240220075245.75416-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- net/devlink/port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/devlink/port.c b/net/devlink/port.c index 78592912f657c..4b2d46ccfe484 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -583,7 +583,7 @@ devlink_nl_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink, xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) { err = devlink_nl_port_fill(msg, devlink_port, - DEVLINK_CMD_NEW, + DEVLINK_CMD_PORT_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, cb->extack); -- GitLab From 1fde0ca3a0de7e9f917668941156959dd5e9108b Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 20 Feb 2024 08:59:28 +0000 Subject: [PATCH 1349/1405] net/sched: flower: Add lock protection when remove filter handle As IDR can't protect itself from the concurrent modification, place idr_remove() under the protection of tp->lock. Fixes: 08a0063df3ae ("net/sched: flower: Move filter handle initialization earlier") Signed-off-by: Jianbo Liu Reviewed-by: Cosmin Ratiu Reviewed-by: Gal Pressman Reviewed-by: Jiri Pirko Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20240220085928.9161-1-jianbol@nvidia.com Signed-off-by: Jakub Kicinski --- net/sched/cls_flower.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index efb9d2811b73d..6ee7064c82fcc 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2460,8 +2460,11 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } errout_idr: - if (!fold) + if (!fold) { + spin_lock(&tp->lock); idr_remove(&head->handle_idr, fnew->handle); + spin_unlock(&tp->lock); + } __fl_put(fnew); errout_tb: kfree(tb); -- GitLab From 603ead96582d85903baec2d55f021b8dac5c25d2 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 19 Feb 2024 09:00:43 +0100 Subject: [PATCH 1350/1405] net: sparx5: Add spinlock for frame transmission from CPU Both registers used when doing manual injection or fdma injection are shared between all the net devices of the switch. It was noticed that when having two process which each of them trying to inject frames on different ethernet ports, that the HW started to behave strange, by sending out more frames then expected. When doing fdma injection it is required to set the frame in the DCB and then make sure that the next pointer of the last DCB is invalid. But because there is no locks for this, then easily this pointer between the DCB can be broken and then it would create a loop of DCBs. And that means that the HW will continuously transmit these frames in a loop. Until the SW will break this loop. Therefore to fix this issue, add a spin lock for when accessing the registers for manual or fdma injection. Signed-off-by: Horatiu Vultur Reviewed-by: Daniel Machon Fixes: f3cad2611a77 ("net: sparx5: add hostmode with phylink support") Link: https://lore.kernel.org/r/20240219080043.1561014-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 1 + drivers/net/ethernet/microchip/sparx5/sparx5_main.h | 1 + drivers/net/ethernet/microchip/sparx5/sparx5_packet.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index d1f7fc8b1b71a..3c066b62e6894 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -757,6 +757,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sparx5); sparx5->pdev = pdev; sparx5->dev = &pdev->dev; + spin_lock_init(&sparx5->tx_lock); /* Do switch core reset if available */ reset = devm_reset_control_get_optional_shared(&pdev->dev, "switch"); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index 6f565c0c0c3dc..316fed5f27355 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -280,6 +280,7 @@ struct sparx5 { int xtr_irq; /* Frame DMA */ int fdma_irq; + spinlock_t tx_lock; /* lock for frame transmission */ struct sparx5_rx rx; struct sparx5_tx tx; /* PTP */ diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index 6db6ac6a3bbc2..ac7e1cffbcecf 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -244,10 +244,12 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev) } skb_tx_timestamp(skb); + spin_lock(&sparx5->tx_lock); if (sparx5->fdma_irq > 0) ret = sparx5_fdma_xmit(sparx5, ifh, skb); else ret = sparx5_inject(sparx5, ifh, skb, dev); + spin_unlock(&sparx5->tx_lock); if (ret == -EBUSY) goto busy; -- GitLab From 56ee7db31187dc36d501622cb5f1415e88e01c2a Mon Sep 17 00:00:00 2001 From: Sandeep Dhavale Date: Wed, 21 Feb 2024 13:03:47 -0800 Subject: [PATCH 1351/1405] erofs: fix refcount on the metabuf used for inode lookup In erofs_find_target_block() when erofs_dirnamecmp() returns 0, we do not assign the target metabuf. This causes the caller erofs_namei()'s erofs_put_metabuf() at the end to be not effective leaving the refcount on the page. As the page from metabuf (buf->page) is never put, such page cannot be migrated or reclaimed. Fix it now by putting the metabuf from previous loop and assigning the current metabuf to target before returning so caller erofs_namei() can do the final put as it was intended. Fixes: 500edd095648 ("erofs: use meta buffers for inode lookup") Cc: # 5.18+ Signed-off-by: Sandeep Dhavale Reviewed-by: Gao Xiang Reviewed-by: Jingbo Xu Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20240221210348.3667795-1-dhavale@google.com Signed-off-by: Gao Xiang --- fs/erofs/namei.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c index d4f631d39f0fa..f0110a78acb20 100644 --- a/fs/erofs/namei.c +++ b/fs/erofs/namei.c @@ -130,24 +130,24 @@ static void *erofs_find_target_block(struct erofs_buf *target, /* string comparison without already matched prefix */ diff = erofs_dirnamecmp(name, &dname, &matched); - if (!diff) { - *_ndirents = 0; - goto out; - } else if (diff > 0) { - head = mid + 1; - startprfx = matched; - - if (!IS_ERR(candidate)) - erofs_put_metabuf(target); - *target = buf; - candidate = de; - *_ndirents = ndirents; - } else { + if (diff < 0) { erofs_put_metabuf(&buf); - back = mid - 1; endprfx = matched; + continue; + } + + if (!IS_ERR(candidate)) + erofs_put_metabuf(target); + *target = buf; + if (!diff) { + *_ndirents = 0; + return de; } + head = mid + 1; + startprfx = matched; + candidate = de; + *_ndirents = ndirents; continue; } out: /* free if the candidate is valid */ -- GitLab From 3b2d9bc4d4acdf15a876eae2c0d83149250e85ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 18 Feb 2024 10:12:13 +0200 Subject: [PATCH 1352/1405] phonet: take correct lock to peek at the RX queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The receive queue is protected by its embedded spin-lock, not the socket lock, so we need the former lock here (and only that one). Fixes: 107d0d9b8d9a ("Phonet: Phonet datagram transport protocol") Reported-by: Luosili Signed-off-by: Rémi Denis-Courmont Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240218081214.4806-1-remi@remlab.net Signed-off-by: Paolo Abeni --- net/phonet/datagram.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 3aa50dc7535b7..976fe250b5095 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -34,10 +34,10 @@ static int pn_ioctl(struct sock *sk, int cmd, int *karg) switch (cmd) { case SIOCINQ: - lock_sock(sk); + spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); *karg = skb ? skb->len : 0; - release_sock(sk); + spin_unlock_bh(&sk->sk_receive_queue.lock); return 0; case SIOCPNADDRESOURCE: -- GitLab From 7d2a894d7f487dcb894df023e9d3014cf5b93fe5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 18 Feb 2024 10:12:14 +0200 Subject: [PATCH 1353/1405] phonet/pep: fix racy skb_queue_empty() use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The receive queues are protected by their respective spin-lock, not the socket lock. This could lead to skb_peek() unexpectedly returning NULL or a pointer to an already dequeued socket buffer. Fixes: 9641458d3ec4 ("Phonet: Pipe End Point for Phonet Pipes protocol") Signed-off-by: Rémi Denis-Courmont Link: https://lore.kernel.org/r/20240218081214.4806-2-remi@remlab.net Signed-off-by: Paolo Abeni --- net/phonet/pep.c | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/net/phonet/pep.c b/net/phonet/pep.c index faba31f2eff29..3dd5f52bc1b58 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -917,6 +917,37 @@ static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len) return 0; } +static unsigned int pep_first_packet_length(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + struct sk_buff_head *q; + struct sk_buff *skb; + unsigned int len = 0; + bool found = false; + + if (sock_flag(sk, SOCK_URGINLINE)) { + q = &pn->ctrlreq_queue; + spin_lock_bh(&q->lock); + skb = skb_peek(q); + if (skb) { + len = skb->len; + found = true; + } + spin_unlock_bh(&q->lock); + } + + if (likely(!found)) { + q = &sk->sk_receive_queue; + spin_lock_bh(&q->lock); + skb = skb_peek(q); + if (skb) + len = skb->len; + spin_unlock_bh(&q->lock); + } + + return len; +} + static int pep_ioctl(struct sock *sk, int cmd, int *karg) { struct pep_sock *pn = pep_sk(sk); @@ -929,15 +960,7 @@ static int pep_ioctl(struct sock *sk, int cmd, int *karg) break; } - lock_sock(sk); - if (sock_flag(sk, SOCK_URGINLINE) && - !skb_queue_empty(&pn->ctrlreq_queue)) - *karg = skb_peek(&pn->ctrlreq_queue)->len; - else if (!skb_queue_empty(&sk->sk_receive_queue)) - *karg = skb_peek(&sk->sk_receive_queue)->len; - else - *karg = 0; - release_sock(sk); + *karg = pep_first_packet_length(sk); ret = 0; break; -- GitLab From f198d933c2e4f8f89e0620fbaf1ea7eac384a0eb Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Mon, 19 Feb 2024 14:52:54 +0100 Subject: [PATCH 1354/1405] Fix write to cloned skb in ipv6_hop_ioam() ioam6_fill_trace_data() writes inside the skb payload without ensuring it's writeable (e.g., not cloned). This function is called both from the input and output path. The output path (ioam6_iptunnel) already does the check. This commit provides a fix for the input path, inside ipv6_hop_ioam(). It also updates ip6_parse_tlv() to refresh the network header pointer ("nh") when returning from ipv6_hop_ioam(). Fixes: 9ee11f0fff20 ("ipv6: ioam: Data plane support for Pre-allocated Trace") Reported-by: Paolo Abeni Signed-off-by: Justin Iurman Signed-off-by: Paolo Abeni --- net/ipv6/exthdrs.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 4952ae7924505..02e9ffb63af19 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -177,6 +177,8 @@ static bool ip6_parse_tlv(bool hopbyhop, case IPV6_TLV_IOAM: if (!ipv6_hop_ioam(skb, off)) return false; + + nh = skb_network_header(skb); break; case IPV6_TLV_JUMBO: if (!ipv6_hop_jumbo(skb, off)) @@ -943,6 +945,14 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (!skb_valid_dst(skb)) ip6_route_input(skb); + /* About to mangle packet header */ + if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len)) + goto drop; + + /* Trace pointer may have changed */ + trace = (struct ioam6_trace_hdr *)(skb_network_header(skb) + + optoff + sizeof(*hdr)); + ioam6_fill_trace_data(skb, ns, trace, true); break; default: -- GitLab From 187bbb6968af5400e436c193765c5d845a07364f Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Mon, 19 Feb 2024 14:52:55 +0100 Subject: [PATCH 1355/1405] selftests: ioam: refactoring to align with the fix ioam6_parser uses a packet socket. After the fix to prevent writing to cloned skb's, the receiver does not see its IOAM data anymore, which makes input/forward ioam-selftests to fail. As a workaround, ioam6_parser now uses an IPv6 raw socket and leverages ancillary data to get hop-by-hop options. As a consequence, the hook is "after" the IOAM data insertion by the receiver and all tests are working again. Signed-off-by: Justin Iurman Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/ioam6.sh | 38 ++++----- tools/testing/selftests/net/ioam6_parser.c | 95 +++++++++++----------- 2 files changed, 66 insertions(+), 67 deletions(-) diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index fe59ca3e5596b..12491850ae985 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -367,14 +367,12 @@ run_test() local desc=$2 local node_src=$3 local node_dst=$4 - local ip6_src=$5 - local ip6_dst=$6 - local if_dst=$7 - local trace_type=$8 - local ioam_ns=$9 - - ip netns exec $node_dst ./ioam6_parser $if_dst $name $ip6_src $ip6_dst \ - $trace_type $ioam_ns & + local ip6_dst=$5 + local trace_type=$6 + local ioam_ns=$7 + local type=$8 + + ip netns exec $node_dst ./ioam6_parser $name $trace_type $ioam_ns $type & local spid=$! sleep 0.1 @@ -489,7 +487,7 @@ out_undef_ns() trace prealloc type 0x800000 ns 0 size 4 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0x800000 0 + db01::1 0x800000 0 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -509,7 +507,7 @@ out_no_room() trace prealloc type 0xc00000 ns 123 size 4 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xc00000 123 + db01::1 0xc00000 123 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -543,14 +541,14 @@ out_bits() if [ $cmd_res != 0 ] then npassed=$((npassed+1)) - log_test_passed "$descr" + log_test_passed "$descr ($1 mode)" else nfailed=$((nfailed+1)) - log_test_failed "$descr" + log_test_failed "$descr ($1 mode)" fi else run_test "out_bit$i" "$descr ($1 mode)" $ioam_node_alpha \ - $ioam_node_beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 + $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 fi done @@ -574,7 +572,7 @@ out_full_supp_trace() trace prealloc type 0xfff002 ns 123 size 100 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xfff002 123 + db01::1 0xfff002 123 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -604,7 +602,7 @@ in_undef_ns() trace prealloc type 0x800000 ns 0 size 4 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0x800000 0 + db01::1 0x800000 0 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -624,7 +622,7 @@ in_no_room() trace prealloc type 0xc00000 ns 123 size 4 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xc00000 123 + db01::1 0xc00000 123 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -651,7 +649,7 @@ in_bits() dev veth0 run_test "in_bit$i" "${desc//$i} ($1 mode)" $ioam_node_alpha \ - $ioam_node_beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 + $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 done [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down @@ -679,7 +677,7 @@ in_oflag() trace prealloc type 0xc00000 ns 123 size 4 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xc00000 123 + db01::1 0xc00000 123 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down @@ -703,7 +701,7 @@ in_full_supp_trace() trace prealloc type 0xfff002 ns 123 size 80 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xfff002 123 + db01::1 0xfff002 123 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -731,7 +729,7 @@ fwd_full_supp_trace() trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_gamma \ - db01::2 db02::2 veth0 0xfff002 123 + db02::2 0xfff002 123 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 down } diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c index d9d1d41901267..895e5bb5044bb 100644 --- a/tools/testing/selftests/net/ioam6_parser.c +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -512,14 +511,6 @@ static int str2id(const char *tname) return -1; } -static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) -{ - return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | - (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | - (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | - (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0; -} - static int get_u32(__u32 *val, const char *arg, int base) { unsigned long res; @@ -603,70 +594,80 @@ static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = { int main(int argc, char **argv) { - int fd, size, hoplen, tid, ret = 1; - struct in6_addr src, dst; + int fd, size, hoplen, tid, ret = 1, on = 1; struct ioam6_hdr *opt; - struct ipv6hdr *ip6h; - __u8 buffer[400], *p; - __u16 ioam_ns; + struct cmsghdr *cmsg; + struct msghdr msg; + struct iovec iov; + __u8 buffer[512]; __u32 tr_type; + __u16 ioam_ns; + __u8 *ptr; - if (argc != 7) + if (argc != 5) goto out; - tid = str2id(argv[2]); + tid = str2id(argv[1]); if (tid < 0 || !func[tid]) goto out; - if (inet_pton(AF_INET6, argv[3], &src) != 1 || - inet_pton(AF_INET6, argv[4], &dst) != 1) + if (get_u32(&tr_type, argv[2], 16) || + get_u16(&ioam_ns, argv[3], 0)) goto out; - if (get_u32(&tr_type, argv[5], 16) || - get_u16(&ioam_ns, argv[6], 0)) + fd = socket(PF_INET6, SOCK_RAW, + !strcmp(argv[4], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6); + if (fd < 0) goto out; - fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6)); - if (!fd) - goto out; + setsockopt(fd, IPPROTO_IPV6, IPV6_RECVHOPOPTS, &on, sizeof(on)); - if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, - argv[1], strlen(argv[1]))) + iov.iov_len = 1; + iov.iov_base = malloc(CMSG_SPACE(sizeof(buffer))); + if (!iov.iov_base) goto close; - recv: - size = recv(fd, buffer, sizeof(buffer), 0); + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = buffer; + msg.msg_controllen = CMSG_SPACE(sizeof(buffer)); + + size = recvmsg(fd, &msg, 0); if (size <= 0) goto close; - ip6h = (struct ipv6hdr *)buffer; + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level != IPPROTO_IPV6 || + cmsg->cmsg_type != IPV6_HOPOPTS || + cmsg->cmsg_len < sizeof(struct ipv6_hopopt_hdr)) + continue; - if (!ipv6_addr_equal(&ip6h->saddr, &src) || - !ipv6_addr_equal(&ip6h->daddr, &dst)) - goto recv; + ptr = (__u8 *)CMSG_DATA(cmsg); - if (ip6h->nexthdr != IPPROTO_HOPOPTS) - goto close; + hoplen = (ptr[1] + 1) << 3; + ptr += sizeof(struct ipv6_hopopt_hdr); - p = buffer + sizeof(*ip6h); - hoplen = (p[1] + 1) << 3; - p += sizeof(struct ipv6_hopopt_hdr); + while (hoplen > 0) { + opt = (struct ioam6_hdr *)ptr; - while (hoplen > 0) { - opt = (struct ioam6_hdr *)p; + if (opt->opt_type == IPV6_TLV_IOAM && + opt->type == IOAM6_TYPE_PREALLOC) { + ptr += sizeof(*opt); + ret = func[tid](tid, + (struct ioam6_trace_hdr *)ptr, + tr_type, ioam_ns); + goto close; + } - if (opt->opt_type == IPV6_TLV_IOAM && - opt->type == IOAM6_TYPE_PREALLOC) { - p += sizeof(*opt); - ret = func[tid](tid, (struct ioam6_trace_hdr *)p, - tr_type, ioam_ns); - break; + ptr += opt->opt_len + 2; + hoplen -= opt->opt_len + 2; } - - p += opt->opt_len + 2; - hoplen -= opt->opt_len + 2; } + + goto recv; close: + free(iov.iov_base); close(fd); out: return ret; -- GitLab From 3489182b11d35f1944c1245fc9c4867cf622c50f Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Tue, 20 Feb 2024 12:30:07 +0530 Subject: [PATCH 1356/1405] net: phy: realtek: Fix rtl8211f_config_init() for RTL8211F(D)(I)-VD-CG PHY Commit bb726b753f75 ("net: phy: realtek: add support for RTL8211F(D)(I)-VD-CG") extended support of the driver from the existing support for RTL8211F(D)(I)-CG PHY to the newer RTL8211F(D)(I)-VD-CG PHY. While that commit indicated that the RTL8211F_PHYCR2 register is not supported by the "VD-CG" PHY model and therefore updated the corresponding section in rtl8211f_config_init() to be invoked conditionally, the call to "genphy_soft_reset()" was left as-is, when it should have also been invoked conditionally. This is because the call to "genphy_soft_reset()" was first introduced by the commit 0a4355c2b7f8 ("net: phy: realtek: add dt property to disable CLKOUT clock") since the RTL8211F guide indicates that a PHY reset should be issued after setting bits in the PHYCR2 register. As the PHYCR2 register is not applicable to the "VD-CG" PHY model, fix the rtl8211f_config_init() function by invoking "genphy_soft_reset()" conditionally based on the presence of the "PHYCR2" register. Fixes: bb726b753f75 ("net: phy: realtek: add support for RTL8211F(D)(I)-VD-CG") Signed-off-by: Siddharth Vadapalli Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240220070007.968762-1-s-vadapalli@ti.com Signed-off-by: Paolo Abeni --- drivers/net/phy/realtek.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 894172a3e15fe..337899c69738e 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -421,9 +421,11 @@ static int rtl8211f_config_init(struct phy_device *phydev) ERR_PTR(ret)); return ret; } + + return genphy_soft_reset(phydev); } - return genphy_soft_reset(phydev); + return 0; } static int rtl821x_suspend(struct phy_device *phydev) -- GitLab From 359e54a93ab43d32ee1bff3c2f9f10cb9f6b6e79 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Tue, 20 Feb 2024 12:21:56 +0000 Subject: [PATCH 1357/1405] l2tp: pass correct message length to ip6_append_data l2tp_ip6_sendmsg needs to avoid accounting for the transport header twice when splicing more data into an already partially-occupied skbuff. To manage this, we check whether the skbuff contains data using skb_queue_empty when deciding how much data to append using ip6_append_data. However, the code which performed the calculation was incorrect: ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0; ...due to C operator precedence, this ends up setting ulen to transhdrlen for messages with a non-zero length, which results in corrupted packets on the wire. Add parentheses to correct the calculation in line with the original intent. Fixes: 9d4c75800f61 ("ipv4, ipv6: Fix handling of transhdrlen in __ip{,6}_append_data()") Cc: David Howells Cc: stable@vger.kernel.org Signed-off-by: Tom Parkin Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240220122156.43131-1-tparkin@katalix.com Signed-off-by: Paolo Abeni --- net/l2tp/l2tp_ip6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index dd3153966173d..7bf14cf9ffaa9 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -627,7 +627,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) back_from_confirm: lock_sock(sk); - ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0; + ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0); err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, transhdrlen, &ipc6, &fl6, (struct rt6_info *)dst, -- GitLab From 8c987693dc2d292d777f1be63cb35233049ae25e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 14 Feb 2024 15:57:42 +0100 Subject: [PATCH 1358/1405] ARM: dts: renesas: rcar-gen2: Add missing #interrupt-cells to DA9063 nodes make dtbs_check W=2: arch/arm/boot/dts/renesas/r8a7790-lager.dts:444.11-458.5: Warning (interrupt_provider): /i2c-mux4/pmic@58: Missing '#interrupt-cells' in interrupt provider ... Fix this by adding the missing #interrupt-cells properties. Reported-by: Rob Herring Signed-off-by: Geert Uytterhoeven Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/a351e503ea97fb1af68395843f513925ff1bdf26.1707922460.git.geert+renesas@glider.be --- arch/arm/boot/dts/renesas/r8a7790-lager.dts | 1 + arch/arm/boot/dts/renesas/r8a7790-stout.dts | 1 + arch/arm/boot/dts/renesas/r8a7791-koelsch.dts | 1 + arch/arm/boot/dts/renesas/r8a7791-porter.dts | 1 + arch/arm/boot/dts/renesas/r8a7792-blanche.dts | 1 + arch/arm/boot/dts/renesas/r8a7793-gose.dts | 1 + arch/arm/boot/dts/renesas/r8a7794-alt.dts | 1 + arch/arm/boot/dts/renesas/r8a7794-silk.dts | 1 + 8 files changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/renesas/r8a7790-lager.dts b/arch/arm/boot/dts/renesas/r8a7790-lager.dts index 2fba4d084001b..8590981245a62 100644 --- a/arch/arm/boot/dts/renesas/r8a7790-lager.dts +++ b/arch/arm/boot/dts/renesas/r8a7790-lager.dts @@ -447,6 +447,7 @@ interrupt-parent = <&irqc0>; interrupts = <2 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; rtc { compatible = "dlg,da9063-rtc"; diff --git a/arch/arm/boot/dts/renesas/r8a7790-stout.dts b/arch/arm/boot/dts/renesas/r8a7790-stout.dts index f9bc5b4f019d0..683f7395fab0b 100644 --- a/arch/arm/boot/dts/renesas/r8a7790-stout.dts +++ b/arch/arm/boot/dts/renesas/r8a7790-stout.dts @@ -347,6 +347,7 @@ interrupt-parent = <&irqc0>; interrupts = <2 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; onkey { compatible = "dlg,da9063-onkey"; diff --git a/arch/arm/boot/dts/renesas/r8a7791-koelsch.dts b/arch/arm/boot/dts/renesas/r8a7791-koelsch.dts index e9c13bb03772a..0efd9f98c75ac 100644 --- a/arch/arm/boot/dts/renesas/r8a7791-koelsch.dts +++ b/arch/arm/boot/dts/renesas/r8a7791-koelsch.dts @@ -819,6 +819,7 @@ interrupt-parent = <&irqc0>; interrupts = <2 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; rtc { compatible = "dlg,da9063-rtc"; diff --git a/arch/arm/boot/dts/renesas/r8a7791-porter.dts b/arch/arm/boot/dts/renesas/r8a7791-porter.dts index 7e8bc06715f65..93c86e9216455 100644 --- a/arch/arm/boot/dts/renesas/r8a7791-porter.dts +++ b/arch/arm/boot/dts/renesas/r8a7791-porter.dts @@ -413,6 +413,7 @@ interrupt-parent = <&irqc0>; interrupts = <2 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; watchdog { compatible = "dlg,da9063-watchdog"; diff --git a/arch/arm/boot/dts/renesas/r8a7792-blanche.dts b/arch/arm/boot/dts/renesas/r8a7792-blanche.dts index 4f9838cf97ee4..540a9ad28f28a 100644 --- a/arch/arm/boot/dts/renesas/r8a7792-blanche.dts +++ b/arch/arm/boot/dts/renesas/r8a7792-blanche.dts @@ -381,6 +381,7 @@ interrupt-parent = <&irqc>; interrupts = <2 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; rtc { compatible = "dlg,da9063-rtc"; diff --git a/arch/arm/boot/dts/renesas/r8a7793-gose.dts b/arch/arm/boot/dts/renesas/r8a7793-gose.dts index 1744fdbf9e0ce..1ea6c757893bc 100644 --- a/arch/arm/boot/dts/renesas/r8a7793-gose.dts +++ b/arch/arm/boot/dts/renesas/r8a7793-gose.dts @@ -759,6 +759,7 @@ interrupt-parent = <&irqc0>; interrupts = <2 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; rtc { compatible = "dlg,da9063-rtc"; diff --git a/arch/arm/boot/dts/renesas/r8a7794-alt.dts b/arch/arm/boot/dts/renesas/r8a7794-alt.dts index c0d067df22a03..b5ecafbb2e4de 100644 --- a/arch/arm/boot/dts/renesas/r8a7794-alt.dts +++ b/arch/arm/boot/dts/renesas/r8a7794-alt.dts @@ -453,6 +453,7 @@ interrupt-parent = <&gpio3>; interrupts = <31 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; rtc { compatible = "dlg,da9063-rtc"; diff --git a/arch/arm/boot/dts/renesas/r8a7794-silk.dts b/arch/arm/boot/dts/renesas/r8a7794-silk.dts index 43d480a7f3eac..595e074085eb4 100644 --- a/arch/arm/boot/dts/renesas/r8a7794-silk.dts +++ b/arch/arm/boot/dts/renesas/r8a7794-silk.dts @@ -439,6 +439,7 @@ interrupt-parent = <&gpio3>; interrupts = <31 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + #interrupt-cells = <2>; onkey { compatible = "dlg,da9063-onkey"; -- GitLab From 40510a941d27d405a82dc3320823d875f94625df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 21 Feb 2024 08:33:24 +0100 Subject: [PATCH 1359/1405] drm/ttm: Fix an invalid freeing on already freed page in error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If caching mode change fails due to, for example, OOM we free the allocated pages in a two-step process. First the pages for which the caching change has already succeeded. Secondly the pages for which a caching change did not succeed. However the second step was incorrectly freeing the pages already freed in the first step. Fix. Signed-off-by: Thomas Hellström Fixes: 379989e7cbdc ("drm/ttm/pool: Fix ttm_pool_alloc error path") Cc: Christian König Cc: Dave Airlie Cc: Christian Koenig Cc: Huang Rui Cc: dri-devel@lists.freedesktop.org Cc: # v6.4+ Reviewed-by: Matthew Auld Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240221073324.3303-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/ttm/ttm_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index b62f420a9f969..112438d965ffb 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -387,7 +387,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, enum ttm_caching caching, pgoff_t start_page, pgoff_t end_page) { - struct page **pages = tt->pages; + struct page **pages = &tt->pages[start_page]; unsigned int order; pgoff_t i, nr; -- GitLab From 3c43177ffb54ea5be97505eb8e2690e99ac96bc9 Mon Sep 17 00:00:00 2001 From: Erik Kurzinger Date: Fri, 19 Jan 2024 08:32:06 -0800 Subject: [PATCH 1360/1405] drm/syncobj: call drm_syncobj_fence_add_wait when WAIT_AVAILABLE flag is set When waiting for a syncobj timeline point whose fence has not yet been submitted with the WAIT_FOR_SUBMIT flag, a callback is registered using drm_syncobj_fence_add_wait and the thread is put to sleep until the timeout expires. If the fence is submitted before then, drm_syncobj_add_point will wake up the sleeping thread immediately which will proceed to wait for the fence to be signaled. However, if the WAIT_AVAILABLE flag is used instead, drm_syncobj_fence_add_wait won't get called, meaning the waiting thread will always sleep for the full timeout duration, even if the fence gets submitted earlier. If it turns out that the fence *has* been submitted by the time it eventually wakes up, it will still indicate to userspace that the wait completed successfully (it won't return -ETIME), but it will have taken much longer than it should have. To fix this, we must call drm_syncobj_fence_add_wait if *either* the WAIT_FOR_SUBMIT flag or the WAIT_AVAILABLE flag is set. The only difference being that with WAIT_FOR_SUBMIT we will also wait for the fence to be signaled after it has been submitted while with WAIT_AVAILABLE we will return immediately. IGT test patch: https://lists.freedesktop.org/archives/igt-dev/2024-January/067537.html v1 -> v2: adjust lockdep_assert_none_held_once condition (cherry picked from commit 8c44ea81634a4a337df70a32621a5f3791be23df) Fixes: 01d6c3578379 ("drm/syncobj: add support for timeline point wait v8") Signed-off-by: Erik Kurzinger Signed-off-by: Simon Ser Reviewed-by: Daniel Vetter Reviewed-by: Simon Ser Link: https://patchwork.freedesktop.org/patch/msgid/20240119163208.3723457-1-ekurzinger@nvidia.com --- drivers/gpu/drm/drm_syncobj.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 84101baeecc6e..56b0677acfa3d 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1040,7 +1040,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, uint64_t *points; uint32_t signaled_count, i; - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) + if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) lockdep_assert_none_held_once(); points = kmalloc_array(count, sizeof(*points), GFP_KERNEL); @@ -1109,7 +1110,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, * fallthough and try a 0 timeout wait! */ - if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { + if (flags & (DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) { for (i = 0; i < count; ++i) drm_syncobj_fence_add_wait(syncobjs[i], &entries[i]); } -- GitLab From b5bf7778b722105d7a04b1d51e884497b542638b Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 21 Feb 2024 20:27:02 -0400 Subject: [PATCH 1361/1405] iommu/arm-smmu-v3: Do not use GFP_KERNEL under as spinlock If the SMMU is configured to use a two level CD table then arm_smmu_write_ctx_desc() allocates a CD table leaf internally using GFP_KERNEL. Due to recent changes this is being done under a spinlock to iterate over the device list - thus it will trigger a sleeping while atomic warning: arm_smmu_sva_set_dev_pasid() mutex_lock(&sva_lock); __arm_smmu_sva_bind() arm_smmu_mmu_notifier_get() spin_lock_irqsave() arm_smmu_write_ctx_desc() arm_smmu_get_cd_ptr() arm_smmu_alloc_cd_leaf_table() dmam_alloc_coherent(GFP_KERNEL) This is a 64K high order allocation and really should not be done atomically. At the moment the rework of the SVA to follow the new API is half finished. Recently the CD table memory was moved from the domain to the master, however we have the confusing situation where the SVA code is wrongly using the RID domains device's list to track which CD tables the SVA is installed in. Remove the logic to replicate the CD across all the domain's masters during attach. We know which master and which CD table the PASID should be installed in. Right now SVA only works when dma-iommu.c is in control of the RID translation, which means we have a single iommu_domain shared across the entire group and that iommu_domain is not shared outside the group. Critically this means that the iommu_group->devices list and RID's smmu_domain->devices list describe the same set of masters. For PCI cases the core code also insists on singleton groups so there is only one entry in the smmu_domain->devices list that is equal to the master being passed in to arm_smmu_sva_set_dev_pasid(). Only non-PCI cases may have multi-device groups. However, the core code will repeat the calls to arm_smmu_sva_set_dev_pasid() across the entire iommu_group->devices list. Instead of having arm_smmu_mmu_notifier_get() indirectly loop over all the devices in the group via the RID's smmu_domain, rely on __arm_smmu_sva_bind() to be called for each device in the group and install the repeated CD entry that way. This avoids taking the spinlock to access the devices list and permits the arm_smmu_write_ctx_desc() to use a sleeping allocation. Leave the arm_smmu_mm_release() as a confusing situation, this requires tracking attached masters inside the SVA domain. Removing the loop allows arm_smmu_write_ctx_desc() to be called outside the spinlock and thus is safe to use GFP_KERNEL. Move the clearing of the CD into arm_smmu_sva_remove_dev_pasid() so that arm_smmu_mmu_notifier_get/put() remain paired functions. Fixes: 24503148c545 ("iommu/arm-smmu-v3: Refactor write_ctx_desc") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/4e25d161-0cf8-4050-9aa3-dfa21cd63e56@moroto.mountain/ Signed-off-by: Jason Gunthorpe Reviewed-by: Michael Shavit Link: https://lore.kernel.org/r/0-v3-11978fc67151+112-smmu_cd_atomic_jgg@nvidia.com Signed-off-by: Will Deacon --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 38 ++++++------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 05722121f00e7..4a27fbdb2d844 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -292,10 +292,8 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain, struct mm_struct *mm) { int ret; - unsigned long flags; struct arm_smmu_ctx_desc *cd; struct arm_smmu_mmu_notifier *smmu_mn; - struct arm_smmu_master *master; list_for_each_entry(smmu_mn, &smmu_domain->mmu_notifiers, list) { if (smmu_mn->mn.mm == mm) { @@ -325,28 +323,9 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain, goto err_free_cd; } - spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_for_each_entry(master, &smmu_domain->devices, domain_head) { - ret = arm_smmu_write_ctx_desc(master, mm_get_enqcmd_pasid(mm), - cd); - if (ret) { - list_for_each_entry_from_reverse( - master, &smmu_domain->devices, domain_head) - arm_smmu_write_ctx_desc( - master, mm_get_enqcmd_pasid(mm), NULL); - break; - } - } - spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); - if (ret) - goto err_put_notifier; - list_add(&smmu_mn->list, &smmu_domain->mmu_notifiers); return smmu_mn; -err_put_notifier: - /* Frees smmu_mn */ - mmu_notifier_put(&smmu_mn->mn); err_free_cd: arm_smmu_free_shared_cd(cd); return ERR_PTR(ret); @@ -363,9 +342,6 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) list_del(&smmu_mn->list); - arm_smmu_update_ctx_desc_devices(smmu_domain, mm_get_enqcmd_pasid(mm), - NULL); - /* * If we went through clear(), we've already invalidated, and no * new TLB entry can have been formed. @@ -381,7 +357,8 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) arm_smmu_free_shared_cd(cd); } -static int __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) +static int __arm_smmu_sva_bind(struct device *dev, ioasid_t pasid, + struct mm_struct *mm) { int ret; struct arm_smmu_bond *bond; @@ -404,9 +381,15 @@ static int __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) goto err_free_bond; } + ret = arm_smmu_write_ctx_desc(master, pasid, bond->smmu_mn->cd); + if (ret) + goto err_put_notifier; + list_add(&bond->list, &master->bonds); return 0; +err_put_notifier: + arm_smmu_mmu_notifier_put(bond->smmu_mn); err_free_bond: kfree(bond); return ret; @@ -568,6 +551,9 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, struct arm_smmu_master *master = dev_iommu_priv_get(dev); mutex_lock(&sva_lock); + + arm_smmu_write_ctx_desc(master, id, NULL); + list_for_each_entry(t, &master->bonds, list) { if (t->mm == mm) { bond = t; @@ -590,7 +576,7 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, struct mm_struct *mm = domain->mm; mutex_lock(&sva_lock); - ret = __arm_smmu_sva_bind(dev, mm); + ret = __arm_smmu_sva_bind(dev, id, mm); mutex_unlock(&sva_lock); return ret; -- GitLab From 2aa6f5b0fd052e363bb9d4b547189f0bf6b3d6d3 Mon Sep 17 00:00:00 2001 From: Erik Kurzinger Date: Wed, 21 Feb 2024 10:44:28 -0800 Subject: [PATCH 1362/1405] drm/syncobj: handle NULL fence in syncobj_eventfd_entry_func During syncobj_eventfd_entry_func, dma_fence_chain_find_seqno may set the fence to NULL if the given seqno is signaled and a later seqno has already been submitted. In that case, the eventfd should be signaled immediately which currently does not happen. This is a similar issue to the one addressed by commit b19926d4f3a6 ("drm/syncobj: Deal with signalled fences in drm_syncobj_find_fence."). As a fix, if the return value of dma_fence_chain_find_seqno indicates success but it sets the fence to NULL, we will assign a stub fence to ensure the following code still signals the eventfd. v1 -> v2: assign a stub fence instead of signaling the eventfd Signed-off-by: Erik Kurzinger Fixes: c7a472297169 ("drm/syncobj: add IOCTL to register an eventfd") Signed-off-by: Simon Ser Link: https://patchwork.freedesktop.org/patch/msgid/20240221184527.37667-1-ekurzinger@nvidia.com --- drivers/gpu/drm/drm_syncobj.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 56b0677acfa3d..a6c19de462928 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1418,10 +1418,21 @@ syncobj_eventfd_entry_func(struct drm_syncobj *syncobj, /* This happens inside the syncobj lock */ fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, 1)); + if (!fence) + return; + ret = dma_fence_chain_find_seqno(&fence, entry->point); - if (ret != 0 || !fence) { + if (ret != 0) { + /* The given seqno has not been submitted yet. */ dma_fence_put(fence); return; + } else if (!fence) { + /* If dma_fence_chain_find_seqno returns 0 but sets the fence + * to NULL, it implies that the given seqno is signaled and a + * later seqno has already been submitted. Assign a stub fence + * so that the eventfd still gets signaled below. + */ + fence = dma_fence_get_stub(); } list_del_init(&entry->node); -- GitLab From 510325e5ac5f45c1180189d3bfc108c54bf64544 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Thu, 22 Feb 2024 12:49:33 +0500 Subject: [PATCH 1363/1405] selftests/iommu: fix the config fragment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The config fragment doesn't follow the correct format to enable those config options which make the config options getting missed while merging with other configs. ➜ merge_config.sh -m .config tools/testing/selftests/iommu/config Using .config as base Merging tools/testing/selftests/iommu/config ➜ make olddefconfig .config:5295:warning: unexpected data: CONFIG_IOMMUFD .config:5296:warning: unexpected data: CONFIG_IOMMUFD_TEST While at it, add CONFIG_FAULT_INJECTION as well which is needed for CONFIG_IOMMUFD_TEST. If CONFIG_FAULT_INJECTION isn't present in base config (such as x86 defconfig), CONFIG_IOMMUFD_TEST doesn't get enabled. Fixes: 57f0988706fe ("iommufd: Add a selftest") Link: https://lore.kernel.org/r/20240222074934.71380-1-usama.anjum@collabora.com Signed-off-by: Muhammad Usama Anjum Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/config | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/iommu/config b/tools/testing/selftests/iommu/config index 6c4f901d6fed3..110d73917615d 100644 --- a/tools/testing/selftests/iommu/config +++ b/tools/testing/selftests/iommu/config @@ -1,2 +1,3 @@ -CONFIG_IOMMUFD -CONFIG_IOMMUFD_TEST +CONFIG_IOMMUFD=y +CONFIG_FAULT_INJECTION=y +CONFIG_IOMMUFD_TEST=y -- GitLab From 5ef1dc40ffa6a6cb968b0fdc43c3a61727a9e950 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Wed, 14 Feb 2024 16:06:28 +0100 Subject: [PATCH 1364/1405] s390/cio: fix invalid -EBUSY on ccw_device_start The s390 common I/O layer (CIO) returns an unexpected -EBUSY return code when drivers try to start I/O while a path-verification (PV) process is pending. This can lead to failed device initialization attempts with symptoms like broken network connectivity after boot. Fix this by replacing the -EBUSY return code with a deferred condition code 1 reply to make path-verification handling consistent from a driver's point of view. The problem can be reproduced semi-regularly using the following process, while repeating steps 2-3 as necessary (example assumes an OSA device with bus-IDs 0.0.a000-0.0.a002 on CHPID 0.02): 1. echo 0.0.a000,0.0.a001,0.0.a002 >/sys/bus/ccwgroup/drivers/qeth/group 2. echo 0 > /sys/bus/ccwgroup/devices/0.0.a000/online 3. echo 1 > /sys/bus/ccwgroup/devices/0.0.a000/online ; \ echo on > /sys/devices/css0/chp0.02/status Background information: The common I/O layer starts path-verification I/Os when it receives indications about changes in a device path's availability. This occurs for example when hardware events indicate a change in channel-path status, or when a manual operation such as a CHPID vary or configure operation is performed. If a driver attempts to start I/O while a PV is running, CIO reports a successful I/O start (ccw_device_start() return code 0). Then, after completion of PV, CIO synthesizes an interrupt response that indicates an asynchronous status condition that prevented the start of the I/O (deferred condition code 1). If a PV indication arrives while a device is busy with driver-owned I/O, PV is delayed until after I/O completion was reported to the driver's interrupt handler. To ensure that PV can be started eventually, CIO reports a device busy condition (ccw_device_start() return code -EBUSY) if a driver tries to start another I/O while PV is pending. In some cases this -EBUSY return code causes device drivers to consider a device not operational, resulting in failed device initialization. Note: The code that introduced the problem was added in 2003. Symptoms started appearing with the following CIO commit that causes a PV indication when a device is removed from the cio_ignore list after the associated parent subchannel device was probed, but before online processing of the CCW device has started: 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") During boot, the cio_ignore list is modified by the cio_ignore dracut module [1] as well as Linux vendor-specific systemd service scripts[2]. When combined, this commit and boot scripts cause a frequent occurrence of the problem during boot. [1] https://github.com/dracutdevs/dracut/tree/master/modules.d/81cio_ignore [2] https://github.com/SUSE/s390-tools/blob/master/cio_ignore.service Cc: stable@vger.kernel.org # v5.15+ Fixes: 2297791c92d0 ("s390/cio: dont unregister subchannel from child-drivers") Tested-By: Thorsten Winkler Reviewed-by: Thorsten Winkler Signed-off-by: Peter Oberparleiter Signed-off-by: Heiko Carstens --- drivers/s390/cio/device_ops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index c533d1dadc6bb..a5dba3829769c 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -202,7 +202,8 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, return -EINVAL; if (cdev->private->state == DEV_STATE_NOT_OPER) return -ENODEV; - if (cdev->private->state == DEV_STATE_VERIFY) { + if (cdev->private->state == DEV_STATE_VERIFY || + cdev->private->flags.doverify) { /* Remember to fake irb when finished. */ if (!cdev->private->flags.fake_irb) { cdev->private->flags.fake_irb = FAKE_CMD_IRB; @@ -214,8 +215,7 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, } if (cdev->private->state != DEV_STATE_ONLINE || ((sch->schib.scsw.cmd.stctl & SCSW_STCTL_PRIM_STATUS) && - !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS)) || - cdev->private->flags.doverify) + !(sch->schib.scsw.cmd.stctl & SCSW_STCTL_SEC_STATUS))) return -EBUSY; ret = cio_set_options (sch, flags); if (ret) -- GitLab From 22e1dc4b2fec17af70f297a4295c5f19a0f3fbeb Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Fri, 2 Feb 2024 17:34:11 +0800 Subject: [PATCH 1365/1405] drm/amd/display: adjust few initialization order in dm [Why] Observe error message "Can't retrieve aconnector in hpd_rx_irq_offload_work" when boot up with a mst tbt4 dock connected. After analyzing, there are few parts needed to be adjusted: 1. hpd_rx_offload_wq[].aconnector is not initialzed before the dmub outbox hpd_irq handler get registered which causes the error message. 2. registeration of hpd and hpd_rx_irq event for usb4 dp tunneling is not aligned with legacy interface sequence [How] Put DMUB_NOTIFICATION_HPD and DMUB_NOTIFICATION_HPD_IRQ handler registration into register_hpd_handlers() to align other interfaces and get hpd_rx_offload_wq[].aconnector initialized earlier than that. Leave DMUB_NOTIFICATION_AUX_REPLY registered as it was since we need that while calling dc_link_detect(). USB4 connection status will be proactively detected by dc_link_detect_connection_type() in amdgpu_dm_initialize_drm_device() Cc: Stable Reviewed-by: Aurabindo Pillai Acked-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 37 +++++++++---------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index cf875751971fe..377d532ca7618 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1843,21 +1843,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) DRM_ERROR("amdgpu: fail to register dmub aux callback"); goto error; } - if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) { - DRM_ERROR("amdgpu: fail to register dmub hpd callback"); - goto error; - } - if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) { - DRM_ERROR("amdgpu: fail to register dmub hpd callback"); - goto error; - } - } - - /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive. - * It is expected that DMUB will resend any pending notifications at this point, for - * example HPD from DPIA. - */ - if (dc_is_dmub_outbox_supported(adev->dm.dc)) { + /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive. + * It is expected that DMUB will resend any pending notifications at this point. Note + * that hpd and hpd_irq handler registration are deferred to register_hpd_handlers() to + * align legacy interface initialization sequence. Connection status will be proactivly + * detected once in the amdgpu_dm_initialize_drm_device. + */ dc_enable_dmub_outbox(adev->dm.dc); /* DPIA trace goes to dmesg logs only if outbox is enabled */ @@ -3536,6 +3527,14 @@ static void register_hpd_handlers(struct amdgpu_device *adev) int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT; int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT; + if (dc_is_dmub_outbox_supported(adev->dm.dc)) { + if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) + DRM_ERROR("amdgpu: fail to register dmub hpd callback"); + + if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) + DRM_ERROR("amdgpu: fail to register dmub hpd callback"); + } + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { @@ -3564,10 +3563,6 @@ static void register_hpd_handlers(struct amdgpu_device *adev) handle_hpd_rx_irq, (void *) aconnector); } - - if (adev->dm.hpd_rx_offload_wq) - adev->dm.hpd_rx_offload_wq[connector->index].aconnector = - aconnector; } } @@ -4561,6 +4556,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) goto fail; } + if (dm->hpd_rx_offload_wq) + dm->hpd_rx_offload_wq[aconnector->base.index].aconnector = + aconnector; + if (!dc_link_detect_connection_type(link, &new_connection_type)) DRM_ERROR("KMS: Failed to detect connector\n"); -- GitLab From 4e73826089ce899357580bbf6e0afe4e6f9900b7 Mon Sep 17 00:00:00 2001 From: Lewis Huang Date: Wed, 31 Jan 2024 17:20:17 +0800 Subject: [PATCH 1366/1405] drm/amd/display: Only allow dig mapping to pwrseq in new asic [Why] The old asic only have 1 pwrseq hw. We don't need to map the diginst to pwrseq inst in old asic. [How] 1. Only mapping dig to pwrseq for new asic. 2. Move mapping function into dcn specific panel control component Cc: Stable # v6.6+ Cc: Mario Limonciello Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3122 Reviewed-by: Anthony Koo Acked-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Lewis Huang Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dce/dce_panel_cntl.c | 1 + .../amd/display/dc/dcn301/dcn301_panel_cntl.c | 1 + .../amd/display/dc/dcn31/dcn31_panel_cntl.c | 18 ++++++++++++- .../drm/amd/display/dc/inc/hw/panel_cntl.h | 2 +- .../drm/amd/display/dc/link/link_factory.c | 26 +------------------ 5 files changed, 21 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c index e8570060d007b..5bca67407c5b1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_panel_cntl.c @@ -290,4 +290,5 @@ void dce_panel_cntl_construct( dce_panel_cntl->base.funcs = &dce_link_panel_cntl_funcs; dce_panel_cntl->base.ctx = init_data->ctx; dce_panel_cntl->base.inst = init_data->inst; + dce_panel_cntl->base.pwrseq_inst = 0; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c index ad0df1a72a90a..9e96a3ace2077 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_panel_cntl.c @@ -215,4 +215,5 @@ void dcn301_panel_cntl_construct( dcn301_panel_cntl->base.funcs = &dcn301_link_panel_cntl_funcs; dcn301_panel_cntl->base.ctx = init_data->ctx; dcn301_panel_cntl->base.inst = init_data->inst; + dcn301_panel_cntl->base.pwrseq_inst = 0; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c index 03248422d6ffd..281be20b1a107 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c @@ -154,8 +154,24 @@ void dcn31_panel_cntl_construct( struct dcn31_panel_cntl *dcn31_panel_cntl, const struct panel_cntl_init_data *init_data) { + uint8_t pwrseq_inst = 0xF; + dcn31_panel_cntl->base.funcs = &dcn31_link_panel_cntl_funcs; dcn31_panel_cntl->base.ctx = init_data->ctx; dcn31_panel_cntl->base.inst = init_data->inst; - dcn31_panel_cntl->base.pwrseq_inst = init_data->pwrseq_inst; + + switch (init_data->eng_id) { + case ENGINE_ID_DIGA: + pwrseq_inst = 0; + break; + case ENGINE_ID_DIGB: + pwrseq_inst = 1; + break; + default: + DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", init_data->eng_id); + ASSERT(false); + break; + } + + dcn31_panel_cntl->base.pwrseq_inst = pwrseq_inst; } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h index 5dcbaa2db964a..e97d964a1791c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/panel_cntl.h @@ -57,7 +57,7 @@ struct panel_cntl_funcs { struct panel_cntl_init_data { struct dc_context *ctx; uint32_t inst; - uint32_t pwrseq_inst; + uint32_t eng_id; }; struct panel_cntl { diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 37d3027c32dcb..cf22b8f28ba6c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -370,30 +370,6 @@ static enum transmitter translate_encoder_to_transmitter( } } -static uint8_t translate_dig_inst_to_pwrseq_inst(struct dc_link *link) -{ - uint8_t pwrseq_inst = 0xF; - struct dc_context *dc_ctx = link->dc->ctx; - - DC_LOGGER_INIT(dc_ctx->logger); - - switch (link->eng_id) { - case ENGINE_ID_DIGA: - pwrseq_inst = 0; - break; - case ENGINE_ID_DIGB: - pwrseq_inst = 1; - break; - default: - DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", link->eng_id); - ASSERT(false); - break; - } - - return pwrseq_inst; -} - - static void link_destruct(struct dc_link *link) { int i; @@ -657,7 +633,7 @@ static bool construct_phy(struct dc_link *link, link->link_id.id == CONNECTOR_ID_LVDS)) { panel_cntl_init_data.ctx = dc_ctx; panel_cntl_init_data.inst = panel_cntl_init_data.ctx->dc_edp_id_count; - panel_cntl_init_data.pwrseq_inst = translate_dig_inst_to_pwrseq_inst(link); + panel_cntl_init_data.eng_id = link->eng_id; link->panel_cntl = link->dc->res_pool->funcs->panel_cntl_create( &panel_cntl_init_data); -- GitLab From d2b48f340d9e4a8fbeb1cdc84cd8da6ad143a907 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 19 Feb 2024 11:43:16 +0530 Subject: [PATCH 1367/1405] drm/amd/display: Fix potential null pointer dereference in dc_dmub_srv Fixes potential null pointer dereference warnings in the dc_dmub_srv_cmd_list_queue_execute() and dc_dmub_srv_is_hw_pwr_up() functions. In both functions, the 'dc_dmub_srv' variable was being dereferenced before it was checked for null. This could lead to a null pointer dereference if 'dc_dmub_srv' is null. The fix is to check if 'dc_dmub_srv' is null before dereferencing it. Thus moving the null checks for 'dc_dmub_srv' to the beginning of the functions to ensure that 'dc_dmub_srv' is not null when it is dereferenced. Found by smatch & thus fixing the below: drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c:133 dc_dmub_srv_cmd_list_queue_execute() warn: variable dereferenced before check 'dc_dmub_srv' (see line 128) drivers/gpu/drm/amd/amdgpu/../display/dc/dc_dmub_srv.c:1167 dc_dmub_srv_is_hw_pwr_up() warn: variable dereferenced before check 'dc_dmub_srv' (see line 1164) Fixes: 028bac583449 ("drm/amd/display: decouple dmcub execution to reduce lock granularity") Fixes: 65138eb72e1f ("drm/amd/display: Add DCN35 DMUB") Cc: JinZe.Xu Cc: Hersen Wu Cc: Josip Pavic Cc: Roman Li Cc: Qingqing Zhuo Cc: Harry Wentland Cc: Rodrigo Siqueira Cc: Aurabindo Pillai Cc: Tom Chung Signed-off-by: Srinivasan Shanmugam Reviewed-by: Tom Chung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 2b79a0e5638e1..363d522603a21 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -125,7 +125,7 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list) { - struct dc_context *dc_ctx = dc_dmub_srv->ctx; + struct dc_context *dc_ctx; struct dmub_srv *dmub; enum dmub_status status; int i; @@ -133,6 +133,7 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, if (!dc_dmub_srv || !dc_dmub_srv->dmub) return false; + dc_ctx = dc_dmub_srv->ctx; dmub = dc_dmub_srv->dmub; for (i = 0 ; i < count; i++) { @@ -1161,7 +1162,7 @@ void dc_dmub_srv_subvp_save_surf_addr(const struct dc_dmub_srv *dc_dmub_srv, con bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait) { - struct dc_context *dc_ctx = dc_dmub_srv->ctx; + struct dc_context *dc_ctx; enum dmub_status status; if (!dc_dmub_srv || !dc_dmub_srv->dmub) @@ -1170,6 +1171,8 @@ bool dc_dmub_srv_is_hw_pwr_up(struct dc_dmub_srv *dc_dmub_srv, bool wait) if (dc_dmub_srv->ctx->dc->debug.dmcub_emulation) return true; + dc_ctx = dc_dmub_srv->ctx; + if (wait) { if (dc_dmub_srv->ctx->dc->debug.disable_timeout) { do { -- GitLab From 27a6c49394b1a203beeb94752c9a1d6318f24ddf Mon Sep 17 00:00:00 2001 From: Swapnil Patel Date: Tue, 6 Feb 2024 11:40:20 -0500 Subject: [PATCH 1368/1405] drm/amd/display: fix input states translation error for dcn35 & dcn351 [Why] Currently there is an error while translating input clock sates into output clock states. The highest fclk setting from output sates is being dropped because of this error. [How] For dcn35 and dcn351, make output_states equal to input states. Reviewed-by: Charlene Liu Acked-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Swapnil Patel Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dml2/dml2_translation_helper.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 23a608274096f..1ba6933d2b361 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -398,7 +398,6 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, /* Copy clocks tables entries, if available */ if (dml2->config.bbox_overrides.clks_table.num_states) { p->in_states->num_states = dml2->config.bbox_overrides.clks_table.num_states; - for (i = 0; i < dml2->config.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels; i++) { p->in_states->state_array[i].dcfclk_mhz = dml2->config.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz; } @@ -437,6 +436,14 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, } dml2_policy_build_synthetic_soc_states(s, p); + if (dml2->v20.dml_core_ctx.project == dml_project_dcn35 || + dml2->v20.dml_core_ctx.project == dml_project_dcn351) { + // Override last out_state with data from last in_state + // This will ensure that out_state contains max fclk + memcpy(&p->out_states->state_array[p->out_states->num_states - 1], + &p->in_states->state_array[p->in_states->num_states - 1], + sizeof(struct soc_state_bounding_box_st)); + } } void dml2_translate_ip_params(const struct dc *in, struct ip_params_st *out) -- GitLab From bae67893578d608e35691dcdfa90c4957debf1d3 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Tue, 13 Feb 2024 01:50:50 +0100 Subject: [PATCH 1369/1405] drm/amd/display: Fix memory leak in dm_sw_fini() After destroying dmub_srv, the memory associated with it is not freed, causing a memory leak: unreferenced object 0xffff896302b45800 (size 1024): comm "(udev-worker)", pid 222, jiffies 4294894636 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 6265fd77): [] kmalloc_trace+0x29d/0x340 [] dm_dmub_sw_init+0xb4/0x450 [amdgpu] [] dm_sw_init+0x15/0x2b0 [amdgpu] [] amdgpu_device_init+0x1417/0x24e0 [amdgpu] [] amdgpu_driver_load_kms+0x15/0x190 [amdgpu] [] amdgpu_pci_probe+0x187/0x4e0 [amdgpu] [] local_pci_probe+0x3e/0x90 [] pci_device_probe+0xc3/0x230 [] really_probe+0xe2/0x480 [] __driver_probe_device+0x78/0x160 [] driver_probe_device+0x1f/0x90 [] __driver_attach+0xce/0x1c0 [] bus_for_each_dev+0x70/0xc0 [] bus_add_driver+0x112/0x210 [] driver_register+0x55/0x100 [] do_one_initcall+0x41/0x300 Fix this by freeing dmub_srv after destroying it. Fixes: 743b9786b14a ("drm/amd/display: Hook up the DMUB service in DM") Signed-off-by: Armin Wolf Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 377d532ca7618..ef27dd71cbbc5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2278,6 +2278,7 @@ static int dm_sw_fini(void *handle) if (adev->dm.dmub_srv) { dmub_srv_destroy(adev->dm.dmub_srv); + kfree(adev->dm.dmub_srv); adev->dm.dmub_srv = NULL; } -- GitLab From 9671761792156f2339627918bafcd713a8a6f777 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Fri, 16 Feb 2024 09:23:19 -0300 Subject: [PATCH 1370/1405] drm/amd/display: fix null-pointer dereference on edid reading Use i2c adapter when there isn't aux_mode in dc_link to fix a null-pointer derefence that happens when running igt@kms_force_connector_basic in a system with DCN2.1 and HDMI connector detected as below: [ +0.178146] BUG: kernel NULL pointer dereference, address: 00000000000004c0 [ +0.000010] #PF: supervisor read access in kernel mode [ +0.000005] #PF: error_code(0x0000) - not-present page [ +0.000004] PGD 0 P4D 0 [ +0.000006] Oops: 0000 [#1] PREEMPT SMP NOPTI [ +0.000006] CPU: 15 PID: 2368 Comm: kms_force_conne Not tainted 6.5.0-asdn+ #152 [ +0.000005] Hardware name: HP HP ENVY x360 Convertible 13-ay1xxx/8929, BIOS F.01 07/14/2021 [ +0.000004] RIP: 0010:i2c_transfer+0xd/0x100 [ +0.000011] Code: ea fc ff ff 66 0f 1f 84 00 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 41 54 55 53 <48> 8b 47 10 48 89 fb 48 83 38 00 0f 84 b3 00 00 00 83 3d 2f 80 16 [ +0.000004] RSP: 0018:ffff9c4f89c0fad0 EFLAGS: 00010246 [ +0.000005] RAX: 0000000000000000 RBX: 0000000000000005 RCX: 0000000000000080 [ +0.000003] RDX: 0000000000000002 RSI: ffff9c4f89c0fb20 RDI: 00000000000004b0 [ +0.000003] RBP: ffff9c4f89c0fb80 R08: 0000000000000080 R09: ffff8d8e0b15b980 [ +0.000003] R10: 00000000000380e0 R11: 0000000000000000 R12: 0000000000000080 [ +0.000002] R13: 0000000000000002 R14: ffff9c4f89c0fb0e R15: ffff9c4f89c0fb0f [ +0.000004] FS: 00007f9ad2176c40(0000) GS:ffff8d90fe9c0000(0000) knlGS:0000000000000000 [ +0.000003] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ +0.000004] CR2: 00000000000004c0 CR3: 0000000121bc4000 CR4: 0000000000750ee0 [ +0.000003] PKRU: 55555554 [ +0.000003] Call Trace: [ +0.000006] [ +0.000006] ? __die+0x23/0x70 [ +0.000011] ? page_fault_oops+0x17d/0x4c0 [ +0.000008] ? preempt_count_add+0x6e/0xa0 [ +0.000008] ? srso_alias_return_thunk+0x5/0x7f [ +0.000011] ? exc_page_fault+0x7f/0x180 [ +0.000009] ? asm_exc_page_fault+0x26/0x30 [ +0.000013] ? i2c_transfer+0xd/0x100 [ +0.000010] drm_do_probe_ddc_edid+0xc2/0x140 [drm] [ +0.000067] ? srso_alias_return_thunk+0x5/0x7f [ +0.000006] ? _drm_do_get_edid+0x97/0x3c0 [drm] [ +0.000043] ? __pfx_drm_do_probe_ddc_edid+0x10/0x10 [drm] [ +0.000042] edid_block_read+0x3b/0xd0 [drm] [ +0.000043] _drm_do_get_edid+0xb6/0x3c0 [drm] [ +0.000041] ? __pfx_drm_do_probe_ddc_edid+0x10/0x10 [drm] [ +0.000043] drm_edid_read_custom+0x37/0xd0 [drm] [ +0.000044] amdgpu_dm_connector_mode_valid+0x129/0x1d0 [amdgpu] [ +0.000153] drm_connector_mode_valid+0x3b/0x60 [drm_kms_helper] [ +0.000000] __drm_helper_update_and_validate+0xfe/0x3c0 [drm_kms_helper] [ +0.000000] ? amdgpu_dm_connector_get_modes+0xb6/0x520 [amdgpu] [ +0.000000] ? srso_alias_return_thunk+0x5/0x7f [ +0.000000] drm_helper_probe_single_connector_modes+0x2ab/0x540 [drm_kms_helper] [ +0.000000] status_store+0xb2/0x1f0 [drm] [ +0.000000] kernfs_fop_write_iter+0x136/0x1d0 [ +0.000000] vfs_write+0x24d/0x440 [ +0.000000] ksys_write+0x6f/0xf0 [ +0.000000] do_syscall_64+0x60/0xc0 [ +0.000000] ? srso_alias_return_thunk+0x5/0x7f [ +0.000000] ? syscall_exit_to_user_mode+0x2b/0x40 [ +0.000000] ? srso_alias_return_thunk+0x5/0x7f [ +0.000000] ? do_syscall_64+0x6c/0xc0 [ +0.000000] ? do_syscall_64+0x6c/0xc0 [ +0.000000] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ +0.000000] RIP: 0033:0x7f9ad46b4b00 [ +0.000000] Code: 40 00 48 8b 15 19 b3 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 80 3d e1 3a 0e 00 00 74 17 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 48 83 ec 28 48 89 [ +0.000000] RSP: 002b:00007ffcbd3bd6d8 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 [ +0.000000] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9ad46b4b00 [ +0.000000] RDX: 0000000000000002 RSI: 00007f9ad48a7417 RDI: 0000000000000009 [ +0.000000] RBP: 0000000000000002 R08: 0000000000000064 R09: 0000000000000000 [ +0.000000] R10: 0000000000000000 R11: 0000000000000202 R12: 00007f9ad48a7417 [ +0.000000] R13: 0000000000000009 R14: 00007ffcbd3bd760 R15: 0000000000000001 [ +0.000000] [ +0.000000] Modules linked in: ctr ccm rfcomm snd_seq_dummy snd_hrtimer snd_seq snd_seq_device cmac algif_hash algif_skcipher af_alg bnep btusb btrtl btbcm btintel btmtk bluetooth uvcvideo videobuf2_vmalloc sha3_generic videobuf2_memops uvc jitterentropy_rng videobuf2_v4l2 videodev drbg videobuf2_common ansi_cprng mc ecdh_generic ecc qrtr binfmt_misc hid_sensor_accel_3d hid_sensor_magn_3d hid_sensor_gyro_3d hid_sensor_trigger industrialio_triggered_buffer kfifo_buf industrialio snd_ctl_led joydev hid_sensor_iio_common rtw89_8852ae rtw89_8852a rtw89_pci snd_hda_codec_realtek rtw89_core snd_hda_codec_generic intel_rapl_msr ledtrig_audio intel_rapl_common snd_hda_codec_hdmi mac80211 snd_hda_intel snd_intel_dspcfg kvm_amd snd_hda_codec snd_soc_dmic snd_acp3x_rn snd_acp3x_pdm_dma libarc4 snd_hwdep snd_soc_core kvm snd_hda_core cfg80211 snd_pci_acp6x snd_pcm nls_ascii snd_timer hp_wmi snd_pci_acp5x nls_cp437 snd_rn_pci_acp3x ucsi_acpi sparse_keymap ccp snd platform_profile snd_acp_config typec_ucsi irqbypass vfat sp5100_tco [ +0.000000] snd_soc_acpi fat rapl pcspkr wmi_bmof roles rfkill rng_core snd_pci_acp3x soundcore k10temp watchdog typec battery ac amd_pmc acpi_tad button hid_sensor_hub hid_multitouch evdev serio_raw msr parport_pc ppdev lp parport fuse loop efi_pstore configfs ip_tables x_tables autofs4 ext4 crc16 mbcache jbd2 btrfs blake2b_generic dm_crypt dm_mod efivarfs raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx libcrc32c crc32c_generic xor raid6_pq raid1 raid0 multipath linear md_mod amdgpu amdxcp i2c_algo_bit drm_ttm_helper ttm crc32_pclmul crc32c_intel drm_exec gpu_sched drm_suballoc_helper nvme ghash_clmulni_intel drm_buddy drm_display_helper sha512_ssse3 nvme_core ahci xhci_pci sha512_generic hid_generic xhci_hcd libahci rtsx_pci_sdmmc t10_pi i2c_hid_acpi drm_kms_helper i2c_hid mmc_core libata aesni_intel crc64_rocksoft_generic crypto_simd amd_sfh crc64_rocksoft scsi_mod usbcore cryptd crc_t10dif cec drm crct10dif_generic hid rtsx_pci crct10dif_pclmul scsi_common rc_core crc64 i2c_piix4 [ +0.000000] usb_common crct10dif_common video wmi [ +0.000000] CR2: 00000000000004c0 [ +0.000000] ---[ end trace 0000000000000000 ]--- Fixes: 0e859faf8670 ("drm/amd/display: Remove unwanted drm edid references") Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ef27dd71cbbc5..5853cf0229176 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6534,10 +6534,15 @@ amdgpu_dm_connector_late_register(struct drm_connector *connector) static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector) { struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); - struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); struct dc_link *dc_link = aconnector->dc_link; struct dc_sink *dc_em_sink = aconnector->dc_em_sink; struct edid *edid; + struct i2c_adapter *ddc; + + if (dc_link->aux_mode) + ddc = &aconnector->dm_dp_aux.aux.ddc; + else + ddc = &aconnector->i2c->base; /* * Note: drm_get_edid gets edid in the following order: @@ -6545,7 +6550,7 @@ static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector) * 2) firmware EDID if set via edid_firmware module parameter * 3) regular DDC read. */ - edid = drm_get_edid(connector, &amdgpu_connector->ddc_bus->aux.ddc); + edid = drm_get_edid(connector, ddc); if (!edid) { DRM_ERROR("No EDID found on connector: %s.\n", connector->name); return; @@ -6586,12 +6591,18 @@ static int get_modes(struct drm_connector *connector) static void create_eml_sink(struct amdgpu_dm_connector *aconnector) { struct drm_connector *connector = &aconnector->base; - struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(&aconnector->base); + struct dc_link *dc_link = aconnector->dc_link; struct dc_sink_init_data init_params = { .link = aconnector->dc_link, .sink_signal = SIGNAL_TYPE_VIRTUAL }; struct edid *edid; + struct i2c_adapter *ddc; + + if (dc_link->aux_mode) + ddc = &aconnector->dm_dp_aux.aux.ddc; + else + ddc = &aconnector->i2c->base; /* * Note: drm_get_edid gets edid in the following order: @@ -6599,7 +6610,7 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector) * 2) firmware EDID if set via edid_firmware module parameter * 3) regular DDC read. */ - edid = drm_get_edid(connector, &amdgpu_connector->ddc_bus->aux.ddc); + edid = drm_get_edid(connector, ddc); if (!edid) { DRM_ERROR("No EDID found on connector: %s.\n", connector->name); return; -- GitLab From bbfaf2aea7164db59739728d62d9cc91d64ff856 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Wed, 21 Feb 2024 17:16:49 +0800 Subject: [PATCH 1371/1405] drm/amdgpu: Fix the runtime resume failure issue Don't set power state flag when system enter runtime suspend, or it may cause runtime resume failure issue. Fixes: 3a9626c816db ("drm/amd: Stop evicting resources on APUs in suspend") Signed-off-by: Ma Jun Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index cc21ed67a3307..7099ff9cf8c50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1528,6 +1528,9 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) */ void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { + if (adev->in_runpm) + return; + if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else if (amdgpu_acpi_is_s3_active(adev)) -- GitLab From 1d492944d3d06047793fa2e7606868f6d7480f87 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 14 Feb 2024 14:06:32 +1000 Subject: [PATCH 1372/1405] nouveau/gsp: add kconfig option to enable GSP paths by default Turing and Ampere will continue to use the old paths by default, but we should allow distros to decide what the policy is. Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20240214040632.661069-1-airlied@gmail.com --- drivers/gpu/drm/nouveau/Kconfig | 8 ++++++++ drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 6 +++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index 1e6aaf95ff7c7..ceef470c9fbfc 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -100,3 +100,11 @@ config DRM_NOUVEAU_SVM help Say Y here if you want to enable experimental support for Shared Virtual Memory (SVM). + +config DRM_NOUVEAU_GSP_DEFAULT + bool "Use GSP firmware for Turing/Ampere (needs firmware installed)" + depends on DRM_NOUVEAU + default n + help + Say Y here if you want to use the GSP codepaths by default on + Turing and Ampere GPUs. diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index a41735ab60683..a64c813856821 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -2312,8 +2312,12 @@ r535_gsp_load(struct nvkm_gsp *gsp, int ver, const struct nvkm_gsp_fwif *fwif) { struct nvkm_subdev *subdev = &gsp->subdev; int ret; + bool enable_gsp = fwif->enable; - if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", fwif->enable)) +#if IS_ENABLED(CONFIG_DRM_NOUVEAU_GSP_DEFAULT) + enable_gsp = true; +#endif + if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", enable_gsp)) return -EINVAL; if ((ret = r535_gsp_load_fw(gsp, "gsp", fwif->ver, &gsp->fws.rm)) || -- GitLab From 3f4d8aac6e768c2215ce68275256971c2f54f0c8 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 24 Jan 2024 13:50:58 +1000 Subject: [PATCH 1373/1405] nouveau: add an ioctl to return vram bar size. This returns the BAR resources size so userspace can make decisions based on rebar support. userspace using this has been proposed for nvk, but it's a rather trivial uapi addition. Reviewed-by: Faith Ekstrand Signed-off-by: Dave Airlie --- drivers/gpu/drm/nouveau/nouveau_abi16.c | 4 ++++ include/uapi/drm/nouveau_drm.h | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index d1bb8151a1df5..4cb323bc32330 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -199,6 +199,7 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) struct nouveau_cli *cli = nouveau_cli(file_priv); struct nouveau_drm *drm = nouveau_drm(dev); struct nvif_device *device = &drm->client.device; + struct nvkm_device *nvkm_device = nvxx_device(&drm->client.device); struct nvkm_gr *gr = nvxx_gr(device); struct drm_nouveau_getparam *getparam = data; struct pci_dev *pdev = to_pci_dev(dev->dev); @@ -263,6 +264,9 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) getparam->value = nouveau_exec_push_max_from_ib_max(ib_max); break; } + case NOUVEAU_GETPARAM_VRAM_BAR_SIZE: + getparam->value = nvkm_device->func->resource_size(nvkm_device, 1); + break; default: NV_PRINTK(dbg, cli, "unknown parameter %lld\n", getparam->param); return -EINVAL; diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index 0bade1592f34f..10a917639d8d3 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -54,6 +54,13 @@ extern "C" { */ #define NOUVEAU_GETPARAM_EXEC_PUSH_MAX 17 +/* + * NOUVEAU_GETPARAM_VRAM_BAR_SIZE - query bar size + * + * Query the VRAM BAR size. + */ +#define NOUVEAU_GETPARAM_VRAM_BAR_SIZE 18 + struct drm_nouveau_getparam { __u64 param; __u64 value; -- GitLab From 72fa02fdf83306c52bc1eede28359e3fa32a151a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 24 Jan 2024 14:24:25 +1000 Subject: [PATCH 1374/1405] nouveau: add an ioctl to report vram usage This reports the currently used vram allocations. userspace using this has been proposed for nvk, but it's a rather trivial uapi addition. Reviewed-by: Faith Ekstrand Signed-off-by: Dave Airlie --- drivers/gpu/drm/nouveau/nouveau_abi16.c | 5 +++++ include/uapi/drm/nouveau_drm.h | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index 4cb323bc32330..cd14f993bdd1b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -267,6 +267,11 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) case NOUVEAU_GETPARAM_VRAM_BAR_SIZE: getparam->value = nvkm_device->func->resource_size(nvkm_device, 1); break; + case NOUVEAU_GETPARAM_VRAM_USED: { + struct ttm_resource_manager *vram_mgr = ttm_manager_type(&drm->ttm.bdev, TTM_PL_VRAM); + getparam->value = (u64)ttm_resource_manager_usage(vram_mgr) << PAGE_SHIFT; + break; + } default: NV_PRINTK(dbg, cli, "unknown parameter %lld\n", getparam->param); return -EINVAL; diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index 10a917639d8d3..77d7ff0d5b110 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -61,6 +61,13 @@ extern "C" { */ #define NOUVEAU_GETPARAM_VRAM_BAR_SIZE 18 +/* + * NOUVEAU_GETPARAM_VRAM_USED + * + * Get remaining VRAM size. + */ +#define NOUVEAU_GETPARAM_VRAM_USED 19 + struct drm_nouveau_getparam { __u64 param; __u64 value; -- GitLab From 1001db6c42e4012b55e5ee19405490f23e033b5a Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 23 Feb 2024 14:36:31 +0800 Subject: [PATCH 1375/1405] LoongArch: Disable IRQ before init_fn() for nonboot CPUs Disable IRQ before init_fn() for nonboot CPUs when hotplug, in order to silence such warnings (and also avoid potential errors due to unexpected interrupts): WARNING: CPU: 1 PID: 0 at kernel/rcu/tree.c:4503 rcu_cpu_starting+0x214/0x280 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.17+ #1198 pc 90000000048e3334 ra 90000000047bd56c tp 900000010039c000 sp 900000010039fdd0 a0 0000000000000001 a1 0000000000000006 a2 900000000802c040 a3 0000000000000000 a4 0000000000000001 a5 0000000000000004 a6 0000000000000000 a7 90000000048e3f4c t0 0000000000000001 t1 9000000005c70968 t2 0000000004000000 t3 000000000005e56e t4 00000000000002e4 t5 0000000000001000 t6 ffffffff80000000 t7 0000000000040000 t8 9000000007931638 u0 0000000000000006 s9 0000000000000004 s0 0000000000000001 s1 9000000006356ac0 s2 9000000007244000 s3 0000000000000001 s4 0000000000000001 s5 900000000636f000 s6 7fffffffffffffff s7 9000000002123940 s8 9000000001ca55f8 ra: 90000000047bd56c tlb_init+0x24c/0x528 ERA: 90000000048e3334 rcu_cpu_starting+0x214/0x280 CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) PRMD: 00000000 (PPLV0 -PIE -PWE) EUEN: 00000000 (-FPE -SXE -ASXE -BTE) ECFG: 00071000 (LIE=12 VS=7) ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.17+ #1198 Stack : 0000000000000000 9000000006375000 9000000005b61878 900000010039c000 900000010039fa30 0000000000000000 900000010039fa38 900000000619a140 9000000006456888 9000000006456880 900000010039f950 0000000000000001 0000000000000001 cb0cb028ec7e52e1 0000000002b90000 9000000100348700 0000000000000000 0000000000000001 ffffffff916d12f1 0000000000000003 0000000000040000 9000000007930370 0000000002b90000 0000000000000004 9000000006366000 900000000619a140 0000000000000000 0000000000000004 0000000000000000 0000000000000009 ffffffffffc681f2 9000000002123940 9000000001ca55f8 9000000006366000 90000000047a4828 00007ffff057ded8 00000000000000b0 0000000000000000 0000000000000000 0000000000071000 ... Call Trace: [<90000000047a4828>] show_stack+0x48/0x1a0 [<9000000005b61874>] dump_stack_lvl+0x84/0xcc [<90000000047f60ac>] __warn+0x8c/0x1e0 [<9000000005b0ab34>] report_bug+0x1b4/0x280 [<9000000005b63110>] do_bp+0x2d0/0x480 [<90000000047a2e20>] handle_bp+0x120/0x1c0 [<90000000048e3334>] rcu_cpu_starting+0x214/0x280 [<90000000047bd568>] tlb_init+0x248/0x528 [<90000000047a4c44>] per_cpu_trap_init+0x124/0x160 [<90000000047a19f4>] cpu_probe+0x494/0xa00 [<90000000047b551c>] start_secondary+0x3c/0xc0 [<9000000005b66134>] smpboot_entry+0x50/0x58 Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen --- arch/loongarch/kernel/smp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 2b49d30eb7c01..87b7190fe48e1 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -337,6 +337,7 @@ void __noreturn arch_cpu_idle_dead(void) addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0); } while (addr == 0); + local_irq_disable(); init_fn = (void *)TO_CACHE(addr); iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR); -- GitLab From 752cd08da320a667a833803a8fd6bb266114cce5 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 23 Feb 2024 14:36:31 +0800 Subject: [PATCH 1376/1405] LoongArch: Update cpu_sibling_map when disabling nonboot CPUs Update cpu_sibling_map when disabling nonboot CPUs by defining & calling clear_cpu_sibling_map(), otherwise we get such errors on SMT systems: jump label: negative count! WARNING: CPU: 6 PID: 45 at kernel/jump_label.c:263 __static_key_slow_dec_cpuslocked+0xec/0x100 CPU: 6 PID: 45 Comm: cpuhp/6 Not tainted 6.8.0-rc5+ #1340 pc 90000000004c302c ra 90000000004c302c tp 90000001005bc000 sp 90000001005bfd20 a0 000000000000001b a1 900000000224c278 a2 90000001005bfb58 a3 900000000224c280 a4 900000000224c278 a5 90000001005bfb50 a6 0000000000000001 a7 0000000000000001 t0 ce87a4763eb5234a t1 ce87a4763eb5234a t2 0000000000000000 t3 0000000000000000 t4 0000000000000006 t5 0000000000000000 t6 0000000000000064 t7 0000000000001964 t8 000000000009ebf6 u0 9000000001f2a068 s9 0000000000000000 s0 900000000246a2d8 s1 ffffffffffffffff s2 ffffffffffffffff s3 90000000021518c0 s4 0000000000000040 s5 9000000002151058 s6 9000000009828e40 s7 00000000000000b4 s8 0000000000000006 ra: 90000000004c302c __static_key_slow_dec_cpuslocked+0xec/0x100 ERA: 90000000004c302c __static_key_slow_dec_cpuslocked+0xec/0x100 CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) PRMD: 00000004 (PPLV0 +PIE -PWE) EUEN: 00000000 (-FPE -SXE -ASXE -BTE) ECFG: 00071c1c (LIE=2-4,10-12 VS=7) ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) PRID: 0014d000 (Loongson-64bit, Loongson-3A6000-HV) CPU: 6 PID: 45 Comm: cpuhp/6 Not tainted 6.8.0-rc5+ #1340 Stack : 0000000000000000 900000000203f258 900000000179afc8 90000001005bc000 90000001005bf980 0000000000000000 90000001005bf988 9000000001fe0be0 900000000224c280 900000000224c278 90000001005bf8c0 0000000000000001 0000000000000001 ce87a4763eb5234a 0000000007f38000 90000001003f8cc0 0000000000000000 0000000000000006 0000000000000000 4c206e6f73676e6f 6f4c203a656d616e 000000000009ec99 0000000007f38000 0000000000000000 900000000214b000 9000000001fe0be0 0000000000000004 0000000000000000 0000000000000107 0000000000000009 ffffffffffafdabe 00000000000000b4 0000000000000006 90000000004c302c 9000000000224528 00005555939a0c7c 00000000000000b0 0000000000000004 0000000000000000 0000000000071c1c ... Call Trace: [<9000000000224528>] show_stack+0x48/0x1a0 [<900000000179afc8>] dump_stack_lvl+0x78/0xa0 [<9000000000263ed0>] __warn+0x90/0x1a0 [<90000000017419b8>] report_bug+0x1b8/0x280 [<900000000179c564>] do_bp+0x264/0x420 [<90000000004c302c>] __static_key_slow_dec_cpuslocked+0xec/0x100 [<90000000002b4d7c>] sched_cpu_deactivate+0x2fc/0x300 [<9000000000266498>] cpuhp_invoke_callback+0x178/0x8a0 [<9000000000267f70>] cpuhp_thread_fun+0xf0/0x240 [<90000000002a117c>] smpboot_thread_fn+0x1dc/0x2e0 [<900000000029a720>] kthread+0x140/0x160 [<9000000000222288>] ret_from_kernel_thread+0xc/0xa4 Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen --- arch/loongarch/kernel/smp.c | 121 ++++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 53 deletions(-) diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 87b7190fe48e1..aabee0b280fe5 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -88,6 +88,73 @@ void show_ipi_list(struct seq_file *p, int prec) } } +static inline void set_cpu_core_map(int cpu) +{ + int i; + + cpumask_set_cpu(cpu, &cpu_core_setup_map); + + for_each_cpu(i, &cpu_core_setup_map) { + if (cpu_data[cpu].package == cpu_data[i].package) { + cpumask_set_cpu(i, &cpu_core_map[cpu]); + cpumask_set_cpu(cpu, &cpu_core_map[i]); + } + } +} + +static inline void set_cpu_sibling_map(int cpu) +{ + int i; + + cpumask_set_cpu(cpu, &cpu_sibling_setup_map); + + for_each_cpu(i, &cpu_sibling_setup_map) { + if (cpus_are_siblings(cpu, i)) { + cpumask_set_cpu(i, &cpu_sibling_map[cpu]); + cpumask_set_cpu(cpu, &cpu_sibling_map[i]); + } + } +} + +static inline void clear_cpu_sibling_map(int cpu) +{ + int i; + + for_each_cpu(i, &cpu_sibling_setup_map) { + if (cpus_are_siblings(cpu, i)) { + cpumask_clear_cpu(i, &cpu_sibling_map[cpu]); + cpumask_clear_cpu(cpu, &cpu_sibling_map[i]); + } + } + + cpumask_clear_cpu(cpu, &cpu_sibling_setup_map); +} + +/* + * Calculate a new cpu_foreign_map mask whenever a + * new cpu appears or disappears. + */ +void calculate_cpu_foreign_map(void) +{ + int i, k, core_present; + cpumask_t temp_foreign_map; + + /* Re-calculate the mask */ + cpumask_clear(&temp_foreign_map); + for_each_online_cpu(i) { + core_present = 0; + for_each_cpu(k, &temp_foreign_map) + if (cpus_are_siblings(i, k)) + core_present = 1; + if (!core_present) + cpumask_set_cpu(i, &temp_foreign_map); + } + + for_each_online_cpu(i) + cpumask_andnot(&cpu_foreign_map[i], + &temp_foreign_map, &cpu_sibling_map[i]); +} + /* Send mailbox buffer via Mail_Send */ static void csr_mail_send(uint64_t data, int cpu, int mailbox) { @@ -303,6 +370,7 @@ int loongson_cpu_disable(void) numa_remove_cpu(cpu); #endif set_cpu_online(cpu, false); + clear_cpu_sibling_map(cpu); calculate_cpu_foreign_map(); local_irq_save(flags); irq_migrate_all_off_this_cpu(); @@ -380,59 +448,6 @@ static int __init ipi_pm_init(void) core_initcall(ipi_pm_init); #endif -static inline void set_cpu_sibling_map(int cpu) -{ - int i; - - cpumask_set_cpu(cpu, &cpu_sibling_setup_map); - - for_each_cpu(i, &cpu_sibling_setup_map) { - if (cpus_are_siblings(cpu, i)) { - cpumask_set_cpu(i, &cpu_sibling_map[cpu]); - cpumask_set_cpu(cpu, &cpu_sibling_map[i]); - } - } -} - -static inline void set_cpu_core_map(int cpu) -{ - int i; - - cpumask_set_cpu(cpu, &cpu_core_setup_map); - - for_each_cpu(i, &cpu_core_setup_map) { - if (cpu_data[cpu].package == cpu_data[i].package) { - cpumask_set_cpu(i, &cpu_core_map[cpu]); - cpumask_set_cpu(cpu, &cpu_core_map[i]); - } - } -} - -/* - * Calculate a new cpu_foreign_map mask whenever a - * new cpu appears or disappears. - */ -void calculate_cpu_foreign_map(void) -{ - int i, k, core_present; - cpumask_t temp_foreign_map; - - /* Re-calculate the mask */ - cpumask_clear(&temp_foreign_map); - for_each_online_cpu(i) { - core_present = 0; - for_each_cpu(k, &temp_foreign_map) - if (cpus_are_siblings(i, k)) - core_present = 1; - if (!core_present) - cpumask_set_cpu(i, &temp_foreign_map); - } - - for_each_online_cpu(i) - cpumask_andnot(&cpu_foreign_map[i], - &temp_foreign_map, &cpu_sibling_map[i]); -} - /* Preload SMP state for boot cpu */ void smp_prepare_boot_cpu(void) { -- GitLab From 9fa304b9f8ec440e614af6d35826110c633c4074 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 23 Feb 2024 14:36:31 +0800 Subject: [PATCH 1377/1405] LoongArch: Call early_init_fdt_scan_reserved_mem() earlier The unflatten_and_copy_device_tree() function contains a call to memblock_alloc(). This means that memblock is allocating memory before any of the reserved memory regions are set aside in the arch_mem_init() function which calls early_init_fdt_scan_reserved_mem(). Therefore, there is a possibility for memblock to allocate from any of the reserved memory regions. Hence, move the call to early_init_fdt_scan_reserved_mem() to be earlier in the init sequence, so that the reserved memory regions are set aside before any allocations are done using memblock. Cc: stable@vger.kernel.org Fixes: 88d4d957edc707e ("LoongArch: Add FDT booting support from efi system table") Signed-off-by: Oreoluwa Babatunde Signed-off-by: Huacai Chen --- arch/loongarch/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index edf2bba801306..634ef17fd38bf 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -357,6 +357,8 @@ void __init platform_init(void) acpi_gbl_use_default_register_widths = false; acpi_boot_table_init(); #endif + + early_init_fdt_scan_reserved_mem(); unflatten_and_copy_device_tree(); #ifdef CONFIG_NUMA @@ -390,8 +392,6 @@ static void __init arch_mem_init(char **cmdline_p) check_kernel_sections_mem(); - early_init_fdt_scan_reserved_mem(); - /* * In order to reduce the possibility of kernel panic when failed to * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate -- GitLab From f661ca40787396a3b7f324ae2a215bd67cc92955 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 23 Feb 2024 14:36:31 +0800 Subject: [PATCH 1378/1405] LoongArch: dts: Minor whitespace cleanup The DTS code coding style expects exactly one space before '{' character. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Huacai Chen --- arch/loongarch/boot/dts/loongson-2k0500-ref.dts | 2 +- arch/loongarch/boot/dts/loongson-2k1000-ref.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts index b38071a4d0b02..8aefb0c126722 100644 --- a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts +++ b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts @@ -60,7 +60,7 @@ #address-cells = <1>; #size-cells = <0>; - eeprom@57{ + eeprom@57 { compatible = "atmel,24c16"; reg = <0x57>; pagesize = <16>; diff --git a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts index 132a2d1ea8bce..ed4d324340411 100644 --- a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts +++ b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts @@ -78,7 +78,7 @@ #address-cells = <1>; #size-cells = <0>; - eeprom@57{ + eeprom@57 { compatible = "atmel,24c16"; reg = <0x57>; pagesize = <16>; -- GitLab From 179af5751af59100305358ee0ee51eec9a7f3953 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Fri, 23 Feb 2024 14:36:31 +0800 Subject: [PATCH 1379/1405] LoongArch: KVM: Fix input validation of _kvm_get_cpucfg() & kvm_check_cpucfg() The range check for the CPUCFG ID is wrong (should have been a || instead of &&) and useless in effect, so fix the obvious mistake. Furthermore, the juggling of the temp return value is unnecessary, because it is semantically equivalent and more readable to just return at every switch case's end. This is done too to avoid potential bugs in the future related to the unwanted complexity. Also, the return value of _kvm_get_cpucfg is meant to be checked, but this was not done, so bad CPUCFG IDs wrongly fall back to the default case and 0 is incorrectly returned; check the return value to fix the UAPI behavior. While at it, also remove the redundant range check in kvm_check_cpucfg, because out-of-range CPUCFG IDs are already rejected by the -EINVAL as returned by _kvm_get_cpucfg(). Fixes: db1ecca22edf ("LoongArch: KVM: Add LSX (128bit SIMD) support") Fixes: 118e10cd893d ("LoongArch: KVM: Add LASX (256bit SIMD) support") Reviewed-by: Bibo Mao Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/kvm/vcpu.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 27701991886dd..c8452aa5c11a7 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -300,9 +300,7 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) static int _kvm_get_cpucfg(int id, u64 *v) { - int ret = 0; - - if (id < 0 && id >= KVM_MAX_CPUCFG_REGS) + if (id < 0 || id >= KVM_MAX_CPUCFG_REGS) return -EINVAL; switch (id) { @@ -324,32 +322,35 @@ static int _kvm_get_cpucfg(int id, u64 *v) if (cpu_has_lasx) *v |= CPUCFG2_LASX; - break; + return 0; default: - ret = -EINVAL; - break; + /* + * No restrictions on other valid CPUCFG IDs' values, but + * CPUCFG data is limited to 32 bits as the LoongArch ISA + * manual says (Volume 1, Section 2.2.10.5 "CPUCFG"). + */ + *v = U32_MAX; + return 0; } - return ret; } static int kvm_check_cpucfg(int id, u64 val) { - u64 mask; - int ret = 0; - - if (id < 0 && id >= KVM_MAX_CPUCFG_REGS) - return -EINVAL; + int ret; + u64 mask = 0; - if (_kvm_get_cpucfg(id, &mask)) + ret = _kvm_get_cpucfg(id, &mask); + if (ret) return ret; + if (val & ~mask) + /* Unsupported features and/or the higher 32 bits should not be set */ + return -EINVAL; + switch (id) { case 2: /* CPUCFG2 features checking */ - if (val & ~mask) - /* The unsupported features should not be set */ - ret = -EINVAL; - else if (!(val & CPUCFG2_LLFTP)) + if (!(val & CPUCFG2_LLFTP)) /* The LLFTP must be set, as guest must has a constant timer */ ret = -EINVAL; else if ((val & CPUCFG2_FP) && (!(val & CPUCFG2_FPSP) || !(val & CPUCFG2_FPDP))) -- GitLab From ec83f39d2b078d6dd029bbde601835b5368fc886 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Fri, 23 Feb 2024 14:36:31 +0800 Subject: [PATCH 1380/1405] LoongArch: KVM: Rename _kvm_get_cpucfg() to _kvm_get_cpucfg_mask() The function is not actually a getter of guest CPUCFG, but rather validation of the input CPUCFG ID plus information about the supported bit flags of that CPUCFG leaf. So rename it to avoid confusion. Reviewed-by: Bibo Mao Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/kvm/vcpu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index c8452aa5c11a7..98c4290af9c49 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -298,7 +298,7 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) return ret; } -static int _kvm_get_cpucfg(int id, u64 *v) +static int _kvm_get_cpucfg_mask(int id, u64 *v) { if (id < 0 || id >= KVM_MAX_CPUCFG_REGS) return -EINVAL; @@ -339,7 +339,7 @@ static int kvm_check_cpucfg(int id, u64 val) int ret; u64 mask = 0; - ret = _kvm_get_cpucfg(id, &mask); + ret = _kvm_get_cpucfg_mask(id, &mask); if (ret) return ret; @@ -567,7 +567,7 @@ static int kvm_loongarch_get_cpucfg_attr(struct kvm_vcpu *vcpu, uint64_t val; uint64_t __user *uaddr = (uint64_t __user *)attr->addr; - ret = _kvm_get_cpucfg(attr->attr, &val); + ret = _kvm_get_cpucfg_mask(attr->attr, &val); if (ret) return ret; -- GitLab From f0f5c4894f89bac9074b45bccc447c3659a0fa6f Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Fri, 23 Feb 2024 14:36:31 +0800 Subject: [PATCH 1381/1405] LoongArch: KVM: Streamline kvm_check_cpucfg() and improve comments All the checks currently done in kvm_check_cpucfg can be realized with early returns, so just do that to avoid extra cognitive burden related to the return value handling. While at it, clean up comments of _kvm_get_cpucfg_mask() and kvm_check_cpucfg(), by removing comments that are merely restatement of the code nearby, and paraphrasing the rest so they read more natural for English speakers (that likely are not familiar with the actual Chinese- influenced grammar). No functional changes intended. Reviewed-by: Bibo Mao Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/kvm/vcpu.c | 42 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 98c4290af9c49..36106922b5d75 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -305,20 +305,16 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v) switch (id) { case 2: - /* Return CPUCFG2 features which have been supported by KVM */ + /* CPUCFG2 features unconditionally supported by KVM */ *v = CPUCFG2_FP | CPUCFG2_FPSP | CPUCFG2_FPDP | CPUCFG2_FPVERS | CPUCFG2_LLFTP | CPUCFG2_LLFTPREV | CPUCFG2_LAM; /* - * If LSX is supported by CPU, it is also supported by KVM, - * as we implement it. + * For the ISA extensions listed below, if one is supported + * by the host, then it is also supported by KVM. */ if (cpu_has_lsx) *v |= CPUCFG2_LSX; - /* - * if LASX is supported by CPU, it is also supported by KVM, - * as we implement it. - */ if (cpu_has_lasx) *v |= CPUCFG2_LASX; @@ -349,24 +345,26 @@ static int kvm_check_cpucfg(int id, u64 val) switch (id) { case 2: - /* CPUCFG2 features checking */ if (!(val & CPUCFG2_LLFTP)) - /* The LLFTP must be set, as guest must has a constant timer */ - ret = -EINVAL; - else if ((val & CPUCFG2_FP) && (!(val & CPUCFG2_FPSP) || !(val & CPUCFG2_FPDP))) - /* Single and double float point must both be set when enable FP */ - ret = -EINVAL; - else if ((val & CPUCFG2_LSX) && !(val & CPUCFG2_FP)) - /* FP should be set when enable LSX */ - ret = -EINVAL; - else if ((val & CPUCFG2_LASX) && !(val & CPUCFG2_LSX)) - /* LSX, FP should be set when enable LASX, and FP has been checked before. */ - ret = -EINVAL; - break; + /* Guests must have a constant timer */ + return -EINVAL; + if ((val & CPUCFG2_FP) && (!(val & CPUCFG2_FPSP) || !(val & CPUCFG2_FPDP))) + /* Single and double float point must both be set when FP is enabled */ + return -EINVAL; + if ((val & CPUCFG2_LSX) && !(val & CPUCFG2_FP)) + /* LSX architecturally implies FP but val does not satisfy that */ + return -EINVAL; + if ((val & CPUCFG2_LASX) && !(val & CPUCFG2_LSX)) + /* LASX architecturally implies LSX and FP but val does not satisfy that */ + return -EINVAL; + return 0; default: - break; + /* + * Values for the other CPUCFG IDs are not being further validated + * besides the mask check above. + */ + return 0; } - return ret; } static int kvm_get_one_reg(struct kvm_vcpu *vcpu, -- GitLab From 65d4418c5002ec5b0e529455bf4152fd43459079 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 22 Feb 2024 10:07:41 -0400 Subject: [PATCH 1382/1405] iommu/sva: Restore SVA handle sharing Prior to commit 092edaddb660 ("iommu: Support mm PASID 1:n with sva domains") the code allowed a SVA handle to be bound multiple times to the same (mm, device) pair. This was alluded to in the kdoc comment, but we had understood this to be more a remark about allowing multiple devices, not a literal same-driver re-opening the same SVA. It turns out uacce and idxd were both relying on the core code to handle reference counting for same-device same-mm scenarios. As this looks hard to resolve in the drivers bring it back to the core code. The new design has changed the meaning of the domain->users refcount to refer to the number of devices that are sharing that domain for the same mm. This is part of the design to lift the SVA domain de-duplication out of the drivers. Return the old behavior by explicitly de-duplicating the struct iommu_sva handle. The same (mm, device) will return the same handle pointer and the core code will handle tracking this. The last unbind of the handle will destroy it. Fixes: 092edaddb660 ("iommu: Support mm PASID 1:n with sva domains") Reported-by: Zhangfei Gao Closes: https://lore.kernel.org/all/20240221110658.529-1-zhangfei.gao@linaro.org/ Tested-by: Zhangfei Gao Signed-off-by: Jason Gunthorpe Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/0-v1-9455fc497a6f+3b4-iommu_sva_sharing_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu-sva.c | 17 +++++++++++++++++ include/linux/iommu.h | 3 +++ 2 files changed, 20 insertions(+) diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index c3fc9201d0be9..7f91c8d0064b7 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -41,6 +41,7 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de } iommu_mm->pasid = pasid; INIT_LIST_HEAD(&iommu_mm->sva_domains); + INIT_LIST_HEAD(&iommu_mm->sva_handles); /* * Make sure the write to mm->iommu_mm is not reordered in front of * initialization to iommu_mm fields. If it does, readers may see a @@ -82,6 +83,14 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm goto out_unlock; } + list_for_each_entry(handle, &mm->iommu_mm->sva_handles, handle_item) { + if (handle->dev == dev) { + refcount_inc(&handle->users); + mutex_unlock(&iommu_sva_lock); + return handle; + } + } + handle = kzalloc(sizeof(*handle), GFP_KERNEL); if (!handle) { ret = -ENOMEM; @@ -108,7 +117,9 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm if (ret) goto out_free_domain; domain->users = 1; + refcount_set(&handle->users, 1); list_add(&domain->next, &mm->iommu_mm->sva_domains); + list_add(&handle->handle_item, &mm->iommu_mm->sva_handles); out: mutex_unlock(&iommu_sva_lock); @@ -141,6 +152,12 @@ void iommu_sva_unbind_device(struct iommu_sva *handle) struct device *dev = handle->dev; mutex_lock(&iommu_sva_lock); + if (!refcount_dec_and_test(&handle->users)) { + mutex_unlock(&iommu_sva_lock); + return; + } + list_del(&handle->handle_item); + iommu_detach_device_pasid(domain, dev, iommu_mm->pasid); if (--domain->users == 0) { list_del(&domain->next); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 1ea2a820e1eb0..5e27cb3a3be99 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -892,11 +892,14 @@ struct iommu_fwspec { struct iommu_sva { struct device *dev; struct iommu_domain *domain; + struct list_head handle_item; + refcount_t users; }; struct iommu_mm_data { u32 pasid; struct list_head sva_domains; + struct list_head sva_handles; }; int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, -- GitLab From 87aec499368d488c20292952d6d4be7cb9e49c5e Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 21 Feb 2024 20:27:13 +0100 Subject: [PATCH 1383/1405] i2c: imx: when being a target, mark the last read as processed When being a target, NAK from the controller means that all bytes have been transferred. So, the last byte needs also to be marked as 'processed'. Otherwise index registers of backends may not increase. Fixes: f7414cd6923f ("i2c: imx: support slave mode for imx I2C driver") Signed-off-by: Corey Minyard Tested-by: Andrew Manley Reviewed-by: Andrew Manley Reviewed-by: Oleksij Rempel [wsa: fixed comment and commit message to properly describe the case] Signed-off-by: Wolfram Sang Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-imx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 88a053987403c..60e813137f844 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -803,6 +803,11 @@ static irqreturn_t i2c_imx_slave_handle(struct imx_i2c_struct *i2c_imx, ctl &= ~I2CR_MTX; imx_i2c_write_reg(ctl, i2c_imx, IMX_I2C_I2CR); imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR); + + /* flag the last byte as processed */ + i2c_imx_slave_event(i2c_imx, + I2C_SLAVE_READ_PROCESSED, &value); + i2c_imx_slave_finish_op(i2c_imx); return IRQ_HANDLED; } -- GitLab From 66ad2fbcdbeab0edfd40c5d94f32f053b98c2320 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 24 Feb 2024 14:48:03 +0100 Subject: [PATCH 1384/1405] dm-integrity, dm-verity: reduce stack usage for recheck The newly added integrity_recheck() function has another larger stack allocation, just like its caller integrity_metadata(). When it gets inlined, the combination of the two exceeds the warning limit for 32-bit architectures and possibly risks an overflow when this is called from a deep call chain through a file system: drivers/md/dm-integrity.c:1767:13: error: stack frame size (1048) exceeds limit (1024) in 'integrity_metadata' [-Werror,-Wframe-larger-than] 1767 | static void integrity_metadata(struct work_struct *w) Since the caller at this point is done using its checksum buffer, just reuse the same buffer in the new function to avoid the double allocation. [Mikulas: add "noinline" to integrity_recheck and verity_recheck. These functions are only called on error, so they shouldn't bloat the stack frame or code size of the caller.] Fixes: c88f5e553fe3 ("dm-integrity: recheck the integrity tag after a failure") Fixes: 9177f3c0dea6 ("dm-verity: recheck the hash after a failure") Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 10 ++++------ drivers/md/dm-verity-target.c | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 44a1dcc06dd73..1fc901df84eb1 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1691,14 +1691,13 @@ static void integrity_sector_checksum(struct dm_integrity_c *ic, sector_t sector get_random_bytes(result, ic->tag_size); } -static void integrity_recheck(struct dm_integrity_io *dio) +static noinline void integrity_recheck(struct dm_integrity_io *dio, char *checksum) { struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); struct dm_integrity_c *ic = dio->ic; struct bvec_iter iter; struct bio_vec bv; sector_t sector, logical_sector, area, offset; - char checksum_onstack[max_t(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)]; struct page *page; void *buffer; @@ -1734,9 +1733,8 @@ static void integrity_recheck(struct dm_integrity_io *dio) goto free_ret; } - integrity_sector_checksum(ic, logical_sector, buffer, - checksum_onstack); - r = dm_integrity_rw_tag(ic, checksum_onstack, &dio->metadata_block, + integrity_sector_checksum(ic, logical_sector, buffer, checksum); + r = dm_integrity_rw_tag(ic, checksum, &dio->metadata_block, &dio->metadata_offset, ic->tag_size, TAG_CMP); if (r) { if (r > 0) { @@ -1851,7 +1849,7 @@ static void integrity_metadata(struct work_struct *w) checksums_ptr - checksums, dio->op == REQ_OP_READ ? TAG_CMP : TAG_WRITE); if (unlikely(r)) { if (r > 0) { - integrity_recheck(dio); + integrity_recheck(dio, checksums); goto skip_io; } if (likely(checksums != checksums_onstack)) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 06c8b637849c9..1b591bfa90d5d 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -491,8 +491,8 @@ static int verity_recheck_copy(struct dm_verity *v, struct dm_verity_io *io, return 0; } -static int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, - struct bvec_iter start, sector_t cur_block) +static noinline int verity_recheck(struct dm_verity *v, struct dm_verity_io *io, + struct bvec_iter start, sector_t cur_block) { struct page *page; void *buffer; -- GitLab From 1f626223a0c8753f8f5c651bf7bfc9e3cfdef7f7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 20 Feb 2024 22:16:00 -0500 Subject: [PATCH 1385/1405] bcachefs: fix backpointer_to_text() when dev does not exist Fixes: Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index b4dc319bcb2bc..569b97904da42 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -68,9 +68,11 @@ void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - prt_str(out, "bucket="); - bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p)); - prt_str(out, " "); + if (bch2_dev_exists2(c, k.k->p.inode)) { + prt_str(out, "bucket="); + bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p)); + prt_str(out, " "); + } bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v); } -- GitLab From 04fee68dd99a53dbf0716e99270b66da26519daf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 22 Feb 2024 21:39:13 -0500 Subject: [PATCH 1386/1405] bcachefs: Kill __GFP_NOFAIL in buffered read path Recently, we fixed our __GFP_NOFAIL usage in the readahead path, but the easy one in read_single_folio() (where wa can return an error) was missed - oops. Fixes: Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io-buffered.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 73c12e565af50..27710cdd5710e 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -303,18 +303,6 @@ void bch2_readahead(struct readahead_control *ractl) darray_exit(&readpages_iter.folios); } -static void __bchfs_readfolio(struct bch_fs *c, struct bch_read_bio *rbio, - subvol_inum inum, struct folio *folio) -{ - bch2_folio_create(folio, __GFP_NOFAIL); - - rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC; - rbio->bio.bi_iter.bi_sector = folio_sector(folio); - BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); - - bch2_trans_run(c, (bchfs_read(trans, rbio, inum, NULL), 0)); -} - static void bch2_read_single_folio_end_io(struct bio *bio) { complete(bio->bi_private); @@ -329,6 +317,9 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping) int ret; DECLARE_COMPLETION_ONSTACK(done); + if (!bch2_folio_create(folio, GFP_KERNEL)) + return -ENOMEM; + bch2_inode_opts_get(&opts, c, &inode->ei_inode); rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_KERNEL, &c->bio_read), @@ -336,7 +327,11 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping) rbio->bio.bi_private = &done; rbio->bio.bi_end_io = bch2_read_single_folio_end_io; - __bchfs_readfolio(c, rbio, inode_inum(inode), folio); + rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC; + rbio->bio.bi_iter.bi_sector = folio_sector(folio); + BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); + + bch2_trans_run(c, (bchfs_read(trans, rbio, inode_inum(inode), NULL), 0)); wait_for_completion(&done); ret = blk_status_to_errno(rbio->bio.bi_status); -- GitLab From 204f45140faa0772d2ca1b3de96d1c0fb3db8e77 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 24 Feb 2024 19:14:36 -0500 Subject: [PATCH 1387/1405] bcachefs: Fix BTREE_ITER_FILTER_SNAPSHOTS on inodes btree If we're in FILTER_SNAPSHOTS mode and we start scanning a range of the keyspace where no keys are visible in the current snapshot, we have a problem - we'll scan for a very long time before scanning terminates. Awhile back, this was fixed for most cases with peek_upto() (and assertions that enforce that it's being used). But the fix missed the fact that the inodes btree is different - every key offset is in a different snapshot tree, not just the inode field. Fixes: Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 5467a8635be11..3ef338df82f5e 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2156,7 +2156,9 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e * isn't monotonically increasing before FILTER_SNAPSHOTS, and * that's what we check against in extents mode: */ - if (k.k->p.inode > end.inode) + if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS) + ? bkey_gt(k.k->p, end) + : k.k->p.inode > end.inode)) goto end; if (iter->update_path && -- GitLab From b58b1b883b9b702e25204dbe2b221eecc8ecd159 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 15 Feb 2024 12:16:05 -0500 Subject: [PATCH 1388/1405] bcachefs: fix iov_iter count underflow on sub-block dio read bch2_direct_IO_read() checks the request offset and size for sector alignment and then falls through to a couple calculations to shrink the size of the request based on the inode size. The problem is that these checks round up to the fs block size, which runs the risk of underflowing iter->count if the block size happens to be large enough. This is triggered by fstest generic/361 with a 4k block size, which subsequently leads to a crash. To avoid this crash, check that the shorten length doesn't exceed the overall length of the iter. Fixes: Signed-off-by: Brian Foster Reviewed-by: Su Yue Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io-direct.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index e3b219e19e100..33cb6da3a5ad2 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -88,6 +88,8 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) return ret; shorten = iov_iter_count(iter) - round_up(ret, block_bytes(c)); + if (shorten >= iter->count) + shorten = 0; iter->count -= shorten; bio = bio_alloc_bioset(NULL, -- GitLab From 097471f9e458dbbe41e25394c1fb1ccd751f0bee Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 17 Feb 2024 20:38:47 -0500 Subject: [PATCH 1389/1405] bcachefs: Fix bch2_journal_flush_device_pins() If a journal write errored, the list of devices it was written to could be empty - we're not supposed to mark an empty replicas list. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_reclaim.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 2cf626315652c..c33dca641575d 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -892,9 +892,11 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) journal_seq_pin(j, seq)->devs); seq++; - spin_unlock(&j->lock); - ret = bch2_mark_replicas(c, &replicas.e); - spin_lock(&j->lock); + if (replicas.e.nr_devs) { + spin_unlock(&j->lock); + ret = bch2_mark_replicas(c, &replicas.e); + spin_lock(&j->lock); + } } spin_unlock(&j->lock); err: -- GitLab From c4333eb541b92d91be57f757dccf6d4368516746 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 24 Feb 2024 01:18:45 -0500 Subject: [PATCH 1390/1405] bcachefs: Fix check_snapshot() memcpy check_snapshot() copies the bch_snapshot to a temporary to easily handle older versions that don't have all the fields of the current version, but it lacked a min() to correctly handle keys newer and larger than the current version. Signed-off-by: Kent Overstreet --- fs/bcachefs/snapshot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 45f67e8b29eb6..ac6ba04d55217 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -728,7 +728,7 @@ static int check_snapshot(struct btree_trans *trans, return 0; memset(&s, 0, sizeof(s)); - memcpy(&s, k.v, bkey_val_bytes(k.k)); + memcpy(&s, k.v, min(sizeof(s), bkey_val_bytes(k.k))); id = le32_to_cpu(s.parent); if (id) { -- GitLab From 583340de1d8b2d6a474eccd5e7d9f7f42f061e1b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 1 Feb 2024 21:10:01 -0500 Subject: [PATCH 1391/1405] fs/super.c: don't drop ->s_user_ns until we free struct super_block itself Avoids fun races in RCU pathwalk... Same goes for freeing LSM shite hanging off super_block's arse. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/super.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/fs/super.c b/fs/super.c index d35e852954892..d6efeba0d0ce7 100644 --- a/fs/super.c +++ b/fs/super.c @@ -274,9 +274,10 @@ static void destroy_super_work(struct work_struct *work) { struct super_block *s = container_of(work, struct super_block, destroy_work); - int i; - - for (i = 0; i < SB_FREEZE_LEVELS; i++) + security_sb_free(s); + put_user_ns(s->s_user_ns); + kfree(s->s_subtype); + for (int i = 0; i < SB_FREEZE_LEVELS; i++) percpu_free_rwsem(&s->s_writers.rw_sem[i]); kfree(s); } @@ -296,9 +297,6 @@ static void destroy_unused_super(struct super_block *s) super_unlock_excl(s); list_lru_destroy(&s->s_dentry_lru); list_lru_destroy(&s->s_inode_lru); - security_sb_free(s); - put_user_ns(s->s_user_ns); - kfree(s->s_subtype); shrinker_free(s->s_shrink); /* no delays needed */ destroy_super_work(&s->destroy_work); @@ -409,9 +407,6 @@ static void __put_super(struct super_block *s) WARN_ON(s->s_dentry_lru.node); WARN_ON(s->s_inode_lru.node); WARN_ON(!list_empty(&s->s_mounts)); - security_sb_free(s); - put_user_ns(s->s_user_ns); - kfree(s->s_subtype); call_rcu(&s->rcu, destroy_super_rcu); } } -- GitLab From cdb67fdeed72248475b1c849699495ef290a1634 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 29 Sep 2023 21:11:41 -0400 Subject: [PATCH 1392/1405] rcu pathwalk: prevent bogus hard errors from may_lookup() If lazy call of ->permission() returns a hard error, check that try_to_unlazy() succeeds before returning it. That both makes life easier for ->permission() instances and closes the race in ENOTDIR handling - it is possible that positive d_can_lookup() seen in link_path_walk() applies to the state *after* unlink() + mkdir(), while nd->inode matches the state prior to that. Normally seeing e.g. EACCES from permission check in rcu pathwalk means that with some timings non-rcu pathwalk would've run into the same; however, running into a non-executable regular file in the middle of a pathname would not get to permission check - it would fail with ENOTDIR instead. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/namei.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index 4e0de939fea12..9342fa6a38c2b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1717,7 +1717,11 @@ static inline int may_lookup(struct mnt_idmap *idmap, { if (nd->flags & LOOKUP_RCU) { int err = inode_permission(idmap, nd->inode, MAY_EXEC|MAY_NOT_BLOCK); - if (err != -ECHILD || !try_to_unlazy(nd)) + if (!err) // success, keep going + return 0; + if (!try_to_unlazy(nd)) + return -ECHILD; // redo it all non-lazy + if (err != -ECHILD) // hard error return err; } return inode_permission(idmap, nd->inode, MAY_EXEC); -- GitLab From 529f89a9e4531e80c44871d7d0c30df6540c20e5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Sep 2023 19:36:07 -0400 Subject: [PATCH 1393/1405] affs: free affs_sb_info with kfree_rcu() one of the flags in it is used by ->d_hash()/->d_compare() Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/affs/affs.h | 1 + fs/affs/super.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 60685ec76d983..2e612834329ac 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -105,6 +105,7 @@ struct affs_sb_info { int work_queued; /* non-zero delayed work is queued */ struct delayed_work sb_work; /* superblock flush delayed work */ spinlock_t work_lock; /* protects sb_work and work_queued */ + struct rcu_head rcu; }; #define AFFS_MOUNT_SF_INTL 0x0001 /* International filesystem. */ diff --git a/fs/affs/super.c b/fs/affs/super.c index 58b391446ae1f..b56a95cf414a4 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -640,7 +640,7 @@ static void affs_kill_sb(struct super_block *sb) affs_brelse(sbi->s_root_bh); kfree(sbi->s_prefix); mutex_destroy(&sbi->s_bmlock); - kfree(sbi); + kfree_rcu(sbi, rcu); } } -- GitLab From a13d1a4de3b0fe3c41d818697d691c886c5585fa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Sep 2023 15:53:32 -0400 Subject: [PATCH 1394/1405] exfat: move freeing sbi, upcase table and dropping nls into rcu-delayed helper That stuff can be accessed by ->d_hash()/->d_compare(); as it is, we have a hard-to-hit UAF if rcu pathwalk manages to get into ->d_hash() on a filesystem that is in process of getting shut down. Besides, having nls and upcase table cleanup moved from ->put_super() towards the place where sbi is freed makes for simpler failure exits. Acked-by: Christian Brauner Signed-off-by: Al Viro --- fs/exfat/exfat_fs.h | 1 + fs/exfat/nls.c | 14 ++++---------- fs/exfat/super.c | 20 +++++++++++--------- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 9474cd50da6d4..361595433480c 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -275,6 +275,7 @@ struct exfat_sb_info { spinlock_t inode_hash_lock; struct hlist_head inode_hashtable[EXFAT_HASH_SIZE]; + struct rcu_head rcu; }; #define EXFAT_CACHE_VALID 0 diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c index 705710f93e2dd..afdf13c34ff52 100644 --- a/fs/exfat/nls.c +++ b/fs/exfat/nls.c @@ -655,7 +655,6 @@ static int exfat_load_upcase_table(struct super_block *sb, unsigned int sect_size = sb->s_blocksize; unsigned int i, index = 0; u32 chksum = 0; - int ret; unsigned char skip = false; unsigned short *upcase_table; @@ -673,8 +672,7 @@ static int exfat_load_upcase_table(struct super_block *sb, if (!bh) { exfat_err(sb, "failed to read sector(0x%llx)", (unsigned long long)sector); - ret = -EIO; - goto free_table; + return -EIO; } sector++; for (i = 0; i < sect_size && index <= 0xFFFF; i += 2) { @@ -701,15 +699,12 @@ static int exfat_load_upcase_table(struct super_block *sb, exfat_err(sb, "failed to load upcase table (idx : 0x%08x, chksum : 0x%08x, utbl_chksum : 0x%08x)", index, chksum, utbl_checksum); - ret = -EINVAL; -free_table: - exfat_free_upcase_table(sbi); - return ret; + return -EINVAL; } static int exfat_load_default_upcase_table(struct super_block *sb) { - int i, ret = -EIO; + int i; struct exfat_sb_info *sbi = EXFAT_SB(sb); unsigned char skip = false; unsigned short uni = 0, *upcase_table; @@ -740,8 +735,7 @@ static int exfat_load_default_upcase_table(struct super_block *sb) return 0; /* FATAL error: default upcase table has error */ - exfat_free_upcase_table(sbi); - return ret; + return -EIO; } int exfat_create_upcase_table(struct super_block *sb) diff --git a/fs/exfat/super.c b/fs/exfat/super.c index d9d4fa91010bb..fcb6582677650 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -39,9 +39,6 @@ static void exfat_put_super(struct super_block *sb) exfat_free_bitmap(sbi); brelse(sbi->boot_bh); mutex_unlock(&sbi->s_lock); - - unload_nls(sbi->nls_io); - exfat_free_upcase_table(sbi); } static int exfat_sync_fs(struct super_block *sb, int wait) @@ -600,7 +597,7 @@ static int __exfat_fill_super(struct super_block *sb) ret = exfat_load_bitmap(sb); if (ret) { exfat_err(sb, "failed to load alloc-bitmap"); - goto free_upcase_table; + goto free_bh; } ret = exfat_count_used_clusters(sb, &sbi->used_clusters); @@ -613,8 +610,6 @@ static int __exfat_fill_super(struct super_block *sb) free_alloc_bitmap: exfat_free_bitmap(sbi); -free_upcase_table: - exfat_free_upcase_table(sbi); free_bh: brelse(sbi->boot_bh); return ret; @@ -701,12 +696,10 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_root = NULL; free_table: - exfat_free_upcase_table(sbi); exfat_free_bitmap(sbi); brelse(sbi->boot_bh); check_nls_io: - unload_nls(sbi->nls_io); return err; } @@ -771,13 +764,22 @@ static int exfat_init_fs_context(struct fs_context *fc) return 0; } +static void delayed_free(struct rcu_head *p) +{ + struct exfat_sb_info *sbi = container_of(p, struct exfat_sb_info, rcu); + + unload_nls(sbi->nls_io); + exfat_free_upcase_table(sbi); + exfat_free_sbi(sbi); +} + static void exfat_kill_sb(struct super_block *sb) { struct exfat_sb_info *sbi = sb->s_fs_info; kill_block_super(sb); if (sbi) - exfat_free_sbi(sbi); + call_rcu(&sbi->rcu, delayed_free); } static struct file_system_type exfat_fs_type = { -- GitLab From af072cf683acd2307e02378cfcf2502c49d2e127 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Sep 2023 20:18:59 -0400 Subject: [PATCH 1395/1405] hfsplus: switch to rcu-delayed unloading of nls and freeing ->s_fs_info ->d_hash() and ->d_compare() use those, so we need to delay freeing them. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/hfsplus/hfsplus_fs.h | 1 + fs/hfsplus/super.c | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 7ededcb720c12..012a3d003fbe6 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -190,6 +190,7 @@ struct hfsplus_sb_info { int work_queued; /* non-zero delayed work is queued */ struct delayed_work sync_work; /* FS sync delayed work */ spinlock_t work_lock; /* protects sync_work and work_queued */ + struct rcu_head rcu; }; #define HFSPLUS_SB_WRITEBACKUP 0 diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 1986b4f18a901..97920202790f9 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -277,6 +277,14 @@ void hfsplus_mark_mdb_dirty(struct super_block *sb) spin_unlock(&sbi->work_lock); } +static void delayed_free(struct rcu_head *p) +{ + struct hfsplus_sb_info *sbi = container_of(p, struct hfsplus_sb_info, rcu); + + unload_nls(sbi->nls); + kfree(sbi); +} + static void hfsplus_put_super(struct super_block *sb) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); @@ -302,9 +310,7 @@ static void hfsplus_put_super(struct super_block *sb) hfs_btree_close(sbi->ext_tree); kfree(sbi->s_vhdr_buf); kfree(sbi->s_backup_vhdr_buf); - unload_nls(sbi->nls); - kfree(sb->s_fs_info); - sb->s_fs_info = NULL; + call_rcu(&sbi->rcu, delayed_free); } static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) -- GitLab From 275655d3207b9e65d1561bf21c06a622d9ec1d43 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 29 Sep 2023 20:24:34 -0400 Subject: [PATCH 1396/1405] afs: fix __afs_break_callback() / afs_drop_open_mmap() race In __afs_break_callback() we might check ->cb_nr_mmap and if it's non-zero do queue_work(&vnode->cb_work). In afs_drop_open_mmap() we decrement ->cb_nr_mmap and do flush_work(&vnode->cb_work) if it reaches zero. The trouble is, there's nothing to prevent __afs_break_callback() from seeing ->cb_nr_mmap before the decrement and do queue_work() after both the decrement and flush_work(). If that happens, we might be in trouble - vnode might get freed before the queued work runs. __afs_break_callback() is always done under ->cb_lock, so let's make sure that ->cb_nr_mmap can change from non-zero to zero while holding ->cb_lock (the spinlock component of it - it's a seqlock and we don't need to mess with the counter). Acked-by: Christian Brauner Signed-off-by: Al Viro --- fs/afs/file.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index 3d33b221d9ca2..ef2cc8f565d25 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -417,13 +417,17 @@ static void afs_add_open_mmap(struct afs_vnode *vnode) static void afs_drop_open_mmap(struct afs_vnode *vnode) { - if (!atomic_dec_and_test(&vnode->cb_nr_mmap)) + if (atomic_add_unless(&vnode->cb_nr_mmap, -1, 1)) return; down_write(&vnode->volume->open_mmaps_lock); - if (atomic_read(&vnode->cb_nr_mmap) == 0) + read_seqlock_excl(&vnode->cb_lock); + // the only place where ->cb_nr_mmap may hit 0 + // see __afs_break_callback() for the other side... + if (atomic_dec_and_test(&vnode->cb_nr_mmap)) list_del_init(&vnode->cb_mmap_link); + read_sequnlock_excl(&vnode->cb_lock); up_write(&vnode->volume->open_mmaps_lock); flush_work(&vnode->cb_work); -- GitLab From 10a973fc4fb22390a8d362dd3265ec2c9a81d84c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2023 21:50:25 -0400 Subject: [PATCH 1397/1405] nfs: make nfs_set_verifier() safe for use in RCU pathwalk nfs_set_verifier() relies upon dentry being pinned; if that's the case, grabbing ->d_lock stabilizes ->d_parent and guarantees that ->d_parent points to a positive dentry. For something we'd run into in RCU mode that is *not* true - dentry might've been through dentry_kill() just as we grabbed ->d_lock, with its parent going through the same just as we get to into nfs_set_verifier_locked(). It might get to detaching inode (and zeroing ->d_inode) before nfs_set_verifier_locked() gets to fetching that; we get an oops as the result. That can happen in nfs{,4} ->d_revalidate(); the call chain in question is nfs_set_verifier_locked() <- nfs_set_verifier() <- nfs_lookup_revalidate_delegated() <- nfs{,4}_do_lookup_revalidate(). We have checked that the parent had been positive, but that's done before we get to nfs_set_verifier() and it's possible for memory pressure to pick our dentry as eviction candidate by that time. If that happens, back-to-back attempts to kill dentry and its parent are quite normal. Sure, in case of eviction we'll fail the ->d_seq check in the caller, but we need to survive until we return there... Acked-by: Christian Brauner Signed-off-by: Al Viro --- fs/nfs/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c8ecbe9990596..ac505671efbdb 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1431,9 +1431,9 @@ static bool nfs_verifier_is_delegated(struct dentry *dentry) static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf) { struct inode *inode = d_inode(dentry); - struct inode *dir = d_inode(dentry->d_parent); + struct inode *dir = d_inode_rcu(dentry->d_parent); - if (!nfs_verify_change_attribute(dir, verf)) + if (!dir || !nfs_verify_change_attribute(dir, verf)) return; if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) nfs_set_verifier_delegated(&verf); -- GitLab From c1b967d03c5d570ed7b90a88031fa2af34bf5b20 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2023 22:11:26 -0400 Subject: [PATCH 1398/1405] nfs: fix UAF on pathwalk running into umount NFS ->d_revalidate(), ->permission() and ->get_link() need to access some parts of nfs_server when called in RCU mode: server->flags server->caps *(server->io_stats) and, worst of all, call server->nfs_client->rpc_ops->have_delegation (the last one - as NFS_PROTO(inode)->have_delegation()). We really don't want to RCU-delay the entire nfs_free_server() (it would have to be done with schedule_work() from RCU callback, since it can't be made to run from interrupt context), but actual freeing of nfs_server and ->io_stats can be done via call_rcu() just fine. nfs_client part is handled simply by making nfs_free_client() use kfree_rcu(). Acked-by: Christian Brauner Signed-off-by: Al Viro --- fs/nfs/client.c | 13 ++++++++++--- include/linux/nfs_fs_sb.h | 2 ++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 44eca51b28085..fbdc9ca80f714 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -246,7 +246,7 @@ void nfs_free_client(struct nfs_client *clp) put_nfs_version(clp->cl_nfs_mod); kfree(clp->cl_hostname); kfree(clp->cl_acceptor); - kfree(clp); + kfree_rcu(clp, rcu); } EXPORT_SYMBOL_GPL(nfs_free_client); @@ -1006,6 +1006,14 @@ struct nfs_server *nfs_alloc_server(void) } EXPORT_SYMBOL_GPL(nfs_alloc_server); +static void delayed_free(struct rcu_head *p) +{ + struct nfs_server *server = container_of(p, struct nfs_server, rcu); + + nfs_free_iostats(server->io_stats); + kfree(server); +} + /* * Free up a server record */ @@ -1031,10 +1039,9 @@ void nfs_free_server(struct nfs_server *server) ida_destroy(&server->lockowner_id); ida_destroy(&server->openowner_id); - nfs_free_iostats(server->io_stats); put_cred(server->cred); - kfree(server); nfs_release_automount_timer(); + call_rcu(&server->rcu, delayed_free); } EXPORT_SYMBOL_GPL(nfs_free_server); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index cd797e00fe359..92de074e63b98 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -124,6 +124,7 @@ struct nfs_client { char cl_ipaddr[48]; struct net *cl_net; struct list_head pending_cb_stateids; + struct rcu_head rcu; }; /* @@ -265,6 +266,7 @@ struct nfs_server { const struct cred *cred; bool has_sec_mnt_opts; struct kobject kobj; + struct rcu_head rcu; }; /* Server capabilities */ -- GitLab From 47458802f6606f652cd0f6dc38cd52ce60ec0145 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Sep 2023 23:52:58 -0400 Subject: [PATCH 1399/1405] procfs: move dropping pde and pid from ->evict_inode() to ->free_inode() that keeps both around until struct inode is freed, making access to them safe from rcu-pathwalk Acked-by: Christian Brauner Signed-off-by: Al Viro --- fs/proc/base.c | 2 -- fs/proc/inode.c | 19 ++++++++----------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 98a031ac26484..18550c071d71c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1878,8 +1878,6 @@ void proc_pid_evict_inode(struct proc_inode *ei) hlist_del_init_rcu(&ei->sibling_inodes); spin_unlock(&pid->lock); } - - put_pid(pid); } struct inode *proc_pid_make_inode(struct super_block *sb, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index b33e490e3fd9f..05350f3c2812c 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -30,7 +30,6 @@ static void proc_evict_inode(struct inode *inode) { - struct proc_dir_entry *de; struct ctl_table_header *head; struct proc_inode *ei = PROC_I(inode); @@ -38,17 +37,8 @@ static void proc_evict_inode(struct inode *inode) clear_inode(inode); /* Stop tracking associated processes */ - if (ei->pid) { + if (ei->pid) proc_pid_evict_inode(ei); - ei->pid = NULL; - } - - /* Let go of any associated proc directory entry */ - de = ei->pde; - if (de) { - pde_put(de); - ei->pde = NULL; - } head = ei->sysctl; if (head) { @@ -80,6 +70,13 @@ static struct inode *proc_alloc_inode(struct super_block *sb) static void proc_free_inode(struct inode *inode) { + struct proc_inode *ei = PROC_I(inode); + + if (ei->pid) + put_pid(ei->pid); + /* Let go of any associated proc directory entry */ + if (ei->pde) + pde_put(ei->pde); kmem_cache_free(proc_inode_cachep, PROC_I(inode)); } -- GitLab From e31f0a57ae1ab2f6e17adb8e602bc120ad722232 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Sep 2023 00:12:00 -0400 Subject: [PATCH 1400/1405] procfs: make freeing proc_fs_info rcu-delayed makes proc_pid_ns() safe from rcu pathwalk (put_pid_ns() is still synchronous, but that's not a problem - it does rcu-delay everything that needs to be) Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/proc/root.c | 2 +- include/linux/proc_fs.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/proc/root.c b/fs/proc/root.c index b55dbc70287b4..06a297a27ba3b 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -271,7 +271,7 @@ static void proc_kill_sb(struct super_block *sb) kill_anon_super(sb); put_pid_ns(fs_info->pid_ns); - kfree(fs_info); + kfree_rcu(fs_info, rcu); } static struct file_system_type proc_fs_type = { diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index de407e7c3b55f..0b2a898544409 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -65,6 +65,7 @@ struct proc_fs_info { kgid_t pid_gid; enum proc_hidepid hide_pid; enum proc_pidonly pidonly; + struct rcu_head rcu; }; static inline struct proc_fs_info *proc_sb_info(struct super_block *sb) -- GitLab From 053fc4f755ad43cf35210677bcba798ccdc48d0c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Sep 2023 00:19:39 -0400 Subject: [PATCH 1401/1405] fuse: fix UAF in rcu pathwalks ->permission(), ->get_link() and ->inode_get_acl() might dereference ->s_fs_info (and, in case of ->permission(), ->s_fs_info->fc->user_ns as well) when called from rcu pathwalk. Freeing ->s_fs_info->fc is rcu-delayed; we need to make freeing ->s_fs_info and dropping ->user_ns rcu-delayed too. Signed-off-by: Al Viro --- fs/fuse/cuse.c | 3 +-- fs/fuse/fuse_i.h | 1 + fs/fuse/inode.c | 15 +++++++++++---- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 91e89e68177ee..b6cad106c37e4 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -474,8 +474,7 @@ static int cuse_send_init(struct cuse_conn *cc) static void cuse_fc_release(struct fuse_conn *fc) { - struct cuse_conn *cc = fc_to_cc(fc); - kfree_rcu(cc, fc.rcu); + kfree(fc_to_cc(fc)); } /** diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 1df83eebda927..bcbe344888627 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -888,6 +888,7 @@ struct fuse_mount { /* Entry on fc->mounts */ struct list_head fc_entry; + struct rcu_head rcu; }; static inline struct fuse_mount *get_fuse_mount_super(struct super_block *sb) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 2a6d44f91729b..516ea2979a90f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -930,6 +930,14 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, } EXPORT_SYMBOL_GPL(fuse_conn_init); +static void delayed_release(struct rcu_head *p) +{ + struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu); + + put_user_ns(fc->user_ns); + fc->release(fc); +} + void fuse_conn_put(struct fuse_conn *fc) { if (refcount_dec_and_test(&fc->count)) { @@ -941,13 +949,12 @@ void fuse_conn_put(struct fuse_conn *fc) if (fiq->ops->release) fiq->ops->release(fiq); put_pid_ns(fc->pid_ns); - put_user_ns(fc->user_ns); bucket = rcu_dereference_protected(fc->curr_bucket, 1); if (bucket) { WARN_ON(atomic_read(&bucket->count) != 1); kfree(bucket); } - fc->release(fc); + call_rcu(&fc->rcu, delayed_release); } } EXPORT_SYMBOL_GPL(fuse_conn_put); @@ -1366,7 +1373,7 @@ EXPORT_SYMBOL_GPL(fuse_send_init); void fuse_free_conn(struct fuse_conn *fc) { WARN_ON(!list_empty(&fc->devices)); - kfree_rcu(fc, rcu); + kfree(fc); } EXPORT_SYMBOL_GPL(fuse_free_conn); @@ -1902,7 +1909,7 @@ static void fuse_sb_destroy(struct super_block *sb) void fuse_mount_destroy(struct fuse_mount *fm) { fuse_conn_put(fm->fc); - kfree(fm); + kfree_rcu(fm, rcu); } EXPORT_SYMBOL(fuse_mount_destroy); -- GitLab From 0511fdb4a378183ca18a9678d3d9044c8ec592c2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 19 Sep 2023 22:28:16 -0400 Subject: [PATCH 1402/1405] cifs_get_link(): bail out in unsafe case ->d_revalidate() bails out there, anyway. It's not enough to prevent getting into ->get_link() in RCU mode, but that could happen only in a very contrieved setup. Not worth trying to do anything fancy here unless ->d_revalidate() stops kicking out of RCU mode at least in some cases. Reviewed-by: Christian Brauner Acked-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/smb/client/cifsfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index e902de4e475af..630e74628dfed 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1172,6 +1172,9 @@ const char *cifs_get_link(struct dentry *dentry, struct inode *inode, { char *target_path; + if (!dentry) + return ERR_PTR(-ECHILD); + target_path = kmalloc(PATH_MAX, GFP_KERNEL); if (!target_path) return ERR_PTR(-ENOMEM); -- GitLab From 9fa8e282c2bfe93338e81a620a49f5903a745231 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 3 Feb 2024 01:17:34 -0500 Subject: [PATCH 1403/1405] ext4_get_link(): fix breakage in RCU mode 1) errors from ext4_getblk() should not be propagated to caller unless we are really sure that we would've gotten the same error in non-RCU pathwalk. 2) we leak buffer_heads if ext4_getblk() is successful, but bh is not uptodate. Signed-off-by: Al Viro --- fs/ext4/symlink.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index 75bf1f88843c4..645240cc0229f 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -92,10 +92,12 @@ static const char *ext4_get_link(struct dentry *dentry, struct inode *inode, if (!dentry) { bh = ext4_getblk(NULL, inode, 0, EXT4_GET_BLOCKS_CACHED_NOWAIT); - if (IS_ERR(bh)) - return ERR_CAST(bh); - if (!bh || !ext4_buffer_uptodate(bh)) + if (IS_ERR(bh) || !bh) return ERR_PTR(-ECHILD); + if (!ext4_buffer_uptodate(bh)) { + brelse(bh); + return ERR_PTR(-ECHILD); + } } else { bh = ext4_bread(NULL, inode, 0, 0); if (IS_ERR(bh)) -- GitLab From 5197728f8182a93a07e5bf860726456322d3a908 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 25 Feb 2024 15:45:34 -0500 Subject: [PATCH 1404/1405] bcachefs: fix bch2_save_backtrace() Missed a call in the previous fix. Signed-off-by: Kent Overstreet --- fs/bcachefs/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 231003b405efc..3a32faa86b5c4 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -289,7 +289,7 @@ int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigne do { nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr + 1); } while (nr_entries == stack->size && - !(ret = darray_make_room(stack, stack->size * 2))); + !(ret = darray_make_room_gfp(stack, stack->size * 2, gfp))); stack->nr = nr_entries; up_read(&task->signal->exec_update_lock); -- GitLab From d206a76d7d2726f3b096037f2079ce0bd3ba329b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 Feb 2024 15:46:06 -0800 Subject: [PATCH 1405/1405] Linux 6.8-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 41fa8a2565f54..6cdb5717bfe05 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* -- GitLab