Commit eb1d7a65 authored by Yafang Shao's avatar Yafang Shao Committed by Linus Torvalds
Browse files

mm, fadvise: improve the expensive remote LRU cache draining after FADV_DONTNEED

Our users reported that there're some random latency spikes when their RT
process is running.  Finally we found that latency spike is caused by
FADV_DONTNEED.  Which may call lru_add_drain_all() to drain LRU cache on
remote CPUs, and then waits the per-cpu work to complete.  The wait time
is uncertain, which may be tens millisecond.

That behavior is unreasonable, because this process is bound to a specific
CPU and the file is only accessed by itself, IOW, there should be no
pagecache pages on a per-cpu pagevec of a remote CPU.  That unreasonable
behavior is partially caused by the wrong comparation of the number of
invalidated pages and the number of the target.  For example,

        if (count < (end_index - start_index + 1))

The count above is how many pages were invalidated in the local CPU, and
(end_index - start_index + 1) is how many pages should be invalidated.
The usage of (end_index - start_index + 1) is incorrect,...
Showing with 49 additions and 22 deletions
+49 -22
......@@ -2581,6 +2581,10 @@ extern bool is_bad_inode(struct inode *);
unsigned long invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end);
void invalidate_mapping_pagevec(struct address_space *mapping,
pgoff_t start, pgoff_t end,
unsigned long *nr_pagevec);
static inline void invalidate_remote_inode(struct inode *inode)
{
if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
......
......@@ -141,7 +141,7 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
}
if (end_index >= start_index) {
unsigned long count;
unsigned long nr_pagevec = 0;
/*
* It's common to FADV_DONTNEED right after
......@@ -154,8 +154,9 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
*/
lru_add_drain();
count = invalidate_mapping_pages(mapping,
start_index, end_index);
invalidate_mapping_pagevec(mapping,
start_index, end_index,
&nr_pagevec);
/*
* If fewer pages were invalidated than expected then
......@@ -163,7 +164,7 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
* a per-cpu pagevec for a remote CPU. Drain all
* pagevecs and try again.
*/
if (count < (end_index - start_index + 1)) {
if (nr_pagevec) {
lru_add_drain_all();
invalidate_mapping_pages(mapping, start_index,
end_index);
......
......@@ -528,23 +528,8 @@ void truncate_inode_pages_final(struct address_space *mapping)
}
EXPORT_SYMBOL(truncate_inode_pages_final);
/**
* invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
* @mapping: the address_space which holds the pages to invalidate
* @start: the offset 'from' which to invalidate
* @end: the offset 'to' which to invalidate (inclusive)
*
* This function only removes the unlocked pages, if you want to
* remove all the pages of one inode, you must call truncate_inode_pages.
*
* invalidate_mapping_pages() will not block on IO activity. It will not
* invalidate pages which are dirty, locked, under writeback or mapped into
* pagetables.
*
* Return: the number of the pages that were invalidated
*/
unsigned long invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end)
unsigned long __invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end, unsigned long *nr_pagevec)
{
pgoff_t indices[PAGEVEC_SIZE];
struct pagevec pvec;
......@@ -610,8 +595,13 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
* Invalidation is a hint that the page is no longer
* of interest and try to speed up its reclaim.
*/
if (!ret)
if (!ret) {
deactivate_file_page(page);
/* It is likely on the pagevec of a remote CPU */
if (nr_pagevec)
(*nr_pagevec)++;
}
if (PageTransHuge(page))
put_page(page);
count += ret;
......@@ -623,8 +613,40 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
}
return count;
}
/**
* invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
* @mapping: the address_space which holds the pages to invalidate
* @start: the offset 'from' which to invalidate
* @end: the offset 'to' which to invalidate (inclusive)
*
* This function only removes the unlocked pages, if you want to
* remove all the pages of one inode, you must call truncate_inode_pages.
*
* invalidate_mapping_pages() will not block on IO activity. It will not
* invalidate pages which are dirty, locked, under writeback or mapped into
* pagetables.
*
* Return: the number of the pages that were invalidated
*/
unsigned long invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end)
{
return __invalidate_mapping_pages(mapping, start, end, NULL);
}
EXPORT_SYMBOL(invalidate_mapping_pages);
/**
* This helper is similar with the above one, except that it accounts for pages
* that are likely on a pagevec and count them in @nr_pagevec, which will used by
* the caller.
*/
void invalidate_mapping_pagevec(struct address_space *mapping,
pgoff_t start, pgoff_t end, unsigned long *nr_pagevec)
{
__invalidate_mapping_pages(mapping, start, end, nr_pagevec);
}
/*
* This is like invalidate_complete_page(), except it ignores the page's
* refcount. We do this because invalidate_inode_pages2() needs stronger
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment