xen-kernel: fix build with clang 6 and apply pending XSA patches

This includes a band-aid for running 64bit PV guests without compromising the whole system. MFH: 2018Q1 Sponsored by: Citrix Systems R&D
author: royger <royger@FreeBSD.org> 2018-01-24 00:23:57 +0800
committer: royger <royger@FreeBSD.org> 2018-01-24 00:23:57 +0800
commit: eb65a9021abbcb4b1ee08b2926724a3caa227a59 (patch)
tree: f089cc1da5298a01dc1290f561b5d38a7cba6b97
parent: 33ece822bf3b7326a7f0f3adafe553e523447a27 (diff)
download: freebsd-ports-gnome-eb65a9021abbcb4b1ee08b2926724a3caa227a59.tar.gz
freebsd-ports-gnome-eb65a9021abbcb4b1ee08b2926724a3caa227a59.tar.zst
freebsd-ports-gnome-eb65a9021abbcb4b1ee08b2926724a3caa227a59.zip
10 files changed, 1646 insertions, 0 deletions
diff --git a/emulators/xen-kernel/files/0001-p2m-Always-check-to-see-if-removing-a-p2m-entry-actu.patch b/emulators/xen-kernel/files/0001-p2m-Always-check-to-see-if-removing-a-p2m-entry-actu.patch
new file mode 100644
index 000000000000..bba280c92641
--- /dev/null
+++ b/emulators/xen-kernel/files/0001-p2m-Always-check-to-see-if-removing-a-p2m-entry-actu.patch
@@ -0,0 +1,176 @@
+From f345ca185e0c042ed12bf929a9e93efaf33397bb Mon Sep 17 00:00:00 2001
+From: George Dunlap <george.dunlap@citrix.com>
+Date: Fri, 10 Nov 2017 16:53:54 +0000
+Subject: [PATCH 1/2] p2m: Always check to see if removing a p2m entry actually
+ worked
+
+The PoD zero-check functions speculatively remove memory from the p2m,
+then check to see if it's completely zeroed, before putting it in the
+cache.
+
+Unfortunately, the p2m_set_entry() calls may fail if the underlying
+pagetable structure needs to change and the domain has exhausted its
+p2m memory pool: for instance, if we're removing a 2MiB region out of
+a 1GiB entry (in the p2m_pod_zero_check_superpage() case), or a 4k
+region out of a 2MiB or larger entry (in the p2m_pod_zero_check()
+case); and the return value is not checked.
+
+The underlying mfn will then be added into the PoD cache, and at some
+point mapped into another location in the p2m.  If the guest
+afterwards ballons out this memory, it will be freed to the hypervisor
+and potentially reused by another domain, in spite of the fact that
+the original domain still has writable mappings to it.
+
+There are several places where p2m_set_entry() shouldn't be able to
+fail, as it is guaranteed to write an entry of the same order that
+succeeded before.  Add a backstop of crashing the domain just in case,
+and an ASSERT_UNREACHABLE() to flag up the broken assumption on debug
+builds.
+
+While we're here, use PAGE_ORDER_2M rather than a magic constant.
+
+This is part of XSA-247.
+
+Reported-by: George Dunlap <george.dunlap.com>
+Signed-off-by: George Dunlap <george.dunlap@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+---
+v4:
+- Removed some training whitespace
+v3:
+- Reformat reset clause to be more compact
+- Make sure to set map[i] = NULL when unmapping in case we need to bail
+v2:
+- Crash a domain if a p2m_set_entry we think cannot fail fails anyway.
+---
+ xen/arch/x86/mm/p2m-pod.c | 77 +++++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 61 insertions(+), 16 deletions(-)
+
+diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
+index 87082cf65f..5ec8a37949 100644
+--- a/xen/arch/x86/mm/p2m-pod.c
++++ b/xen/arch/x86/mm/p2m-pod.c
+@@ -754,8 +754,10 @@ p2m_pod_zero_check_superpage(struct p2m_domain *p2m, unsigned long gfn)
+     }
+ 
+     /* Try to remove the page, restoring old mapping if it fails. */
+-    p2m_set_entry(p2m, gfn, _mfn(INVALID_MFN), PAGE_ORDER_2M,
+-                  p2m_populate_on_demand, p2m->default_access);
++    if ( p2m_set_entry(p2m, gfn, _mfn(INVALID_MFN), PAGE_ORDER_2M,
++                       p2m_populate_on_demand, p2m->default_access) )
++        goto out;
++
+     p2m_tlb_flush_sync(p2m);
+ 
+     /* Make none of the MFNs are used elsewhere... for example, mapped
+@@ -812,9 +814,18 @@ p2m_pod_zero_check_superpage(struct p2m_domain *p2m, unsigned long gfn)
+     ret = SUPERPAGE_PAGES;
+ 
+ out_reset:
+-    if ( reset )
+-        p2m_set_entry(p2m, gfn, mfn0, 9, type0, p2m->default_access);
+-    
++    /*
++     * This p2m_set_entry() call shouldn't be able to fail, since the same order
++     * on the same gfn succeeded above.  If that turns out to be false, crashing
++     * the domain should be the safest way of making sure we don't leak memory.
++     */
++    if ( reset && p2m_set_entry(p2m, gfn, mfn0, PAGE_ORDER_2M,
++                                type0, p2m->default_access) )
++    {
++        ASSERT_UNREACHABLE();
++        domain_crash(d);
++    }
++
+ out:
+     gfn_unlock(p2m, gfn, SUPERPAGE_ORDER);
+     return ret;
+@@ -871,19 +882,30 @@ p2m_pod_zero_check(struct p2m_domain *p2m, unsigned long *gfns, int count)
+         }
+ 
+         /* Try to remove the page, restoring old mapping if it fails. */
+-        p2m_set_entry(p2m, gfns[i], _mfn(INVALID_MFN), PAGE_ORDER_4K,
+-                      p2m_populate_on_demand, p2m->default_access);
++        if ( p2m_set_entry(p2m, gfns[i], _mfn(INVALID_MFN), PAGE_ORDER_4K,
++                           p2m_populate_on_demand, p2m->default_access) )
++            goto skip;
+ 
+         /* See if the page was successfully unmapped.  (Allow one refcount
+          * for being allocated to a domain.) */
+         if ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) > 1 )
+         {
++            /*
++             * If the previous p2m_set_entry call succeeded, this one shouldn't
++             * be able to fail.  If it does, crashing the domain should be safe.
++             */
++            if ( p2m_set_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K,
++                               types[i], p2m->default_access) )
++            {
++                ASSERT_UNREACHABLE();
++                domain_crash(d);
++                goto out_unmap;
++            }
++
++        skip:
+             unmap_domain_page(map[i]);
+             map[i] = NULL;
+ 
+-            p2m_set_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K,
+-                types[i], p2m->default_access);
+-
+             continue;
+         }
+     }
+@@ -902,12 +924,25 @@ p2m_pod_zero_check(struct p2m_domain *p2m, unsigned long *gfns, int count)
+ 
+         unmap_domain_page(map[i]);
+ 
+-        /* See comment in p2m_pod_zero_check_superpage() re gnttab
+-         * check timing.  */
+-        if ( j < PAGE_SIZE/sizeof(*map[i]) )
++        map[i] = NULL;
++
++        /*
++         * See comment in p2m_pod_zero_check_superpage() re gnttab
++         * check timing.
++         */
++        if ( j < (PAGE_SIZE / sizeof(*map[i])) )
+         {
+-            p2m_set_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K,
+-                types[i], p2m->default_access);
++            /*
++             * If the previous p2m_set_entry call succeeded, this one shouldn't
++             * be able to fail.  If it does, crashing the domain should be safe.
++             */
++            if ( p2m_set_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K,
++                               types[i], p2m->default_access) )
++            {
++                ASSERT_UNREACHABLE();
++                domain_crash(d);
++                goto out_unmap;
++            }
+         }
+         else
+         {
+@@ -931,7 +966,17 @@ p2m_pod_zero_check(struct p2m_domain *p2m, unsigned long *gfns, int count)
+             p2m->pod.entry_count++;
+         }
+     }
+-    
++
++    return;
++
++out_unmap:
++    /*
++     * Something went wrong, probably crashing the domain.  Unmap
++     * everything and return.
++     */
++    for ( i = 0; i < count; i++ )
++        if ( map[i] )
++            unmap_domain_page(map[i]);
+ }
+ 
+ #define POD_SWEEP_LIMIT 1024
+-- 
+2.15.0
+
diff --git a/emulators/xen-kernel/files/0001-x86-Meltdown-band-aid-against-malicious-64-bit-PV-gu.patch b/emulators/xen-kernel/files/0001-x86-Meltdown-band-aid-against-malicious-64-bit-PV-gu.patch
new file mode 100644
index 000000000000..97c93b30e1e2
--- /dev/null
+++ b/emulators/xen-kernel/files/0001-x86-Meltdown-band-aid-against-malicious-64-bit-PV-gu.patch
@@ -0,0 +1,756 @@
+From e19517a3355acaaa2ff83018bc41e7fd044161e5 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 17 Jan 2018 17:24:12 +0100
+Subject: [PATCH 1/2] x86: Meltdown band-aid against malicious 64-bit PV guests
+
+This is a very simplistic change limiting the amount of memory a running
+64-bit PV guest has mapped (and hence available for attacking): Only the
+mappings of stack, IDT, and TSS are being cloned from the direct map
+into per-CPU page tables. Guest controlled parts of the page tables are
+being copied into those per-CPU page tables upon entry into the guest.
+Cross-vCPU synchronization of top level page table entry changes is
+being effected by forcing other active vCPU-s of the guest into the
+hypervisor.
+
+The change to context_switch() isn't strictly necessary, but there's no
+reason to keep switching page tables once a PV guest is being scheduled
+out.
+
+This isn't providing full isolation yet, but it should be covering all
+pieces of information exposure of which would otherwise require an XSA.
+
+There is certainly much room for improvement, especially of performance,
+here - first and foremost suppressing all the negative effects on AMD
+systems. But in the interest of backportability (including to really old
+hypervisors, which may not even have alternative patching) any such is
+being left out here.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 5784de3e2067ed73efc2fe42e62831e8ae7f46c4
+master date: 2018-01-16 17:49:03 +0100
+---
+ xen/arch/x86/domain.c              |   5 +
+ xen/arch/x86/mm.c                  |  17 ++++
+ xen/arch/x86/smpboot.c             | 198 +++++++++++++++++++++++++++++++++++++
+ xen/arch/x86/x86_64/asm-offsets.c  |   2 +
+ xen/arch/x86/x86_64/compat/entry.S |  11 +++
+ xen/arch/x86/x86_64/entry.S        | 149 +++++++++++++++++++++++++++-
+ xen/include/asm-x86/asm_defns.h    |  30 ++++++
+ xen/include/asm-x86/current.h      |  12 +++
+ xen/include/asm-x86/processor.h    |   1 +
+ xen/include/asm-x86/x86_64/page.h  |   5 +-
+ 10 files changed, 424 insertions(+), 6 deletions(-)
+
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index 6539b75fa7..3cf18f95b7 100644
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -1949,6 +1949,9 @@ static void paravirt_ctxt_switch_to(struct vcpu *v)
+ 
+     switch_kernel_stack(v);
+ 
++    this_cpu(root_pgt)[root_table_offset(PERDOMAIN_VIRT_START)] =
++        l4e_from_page(v->domain->arch.perdomain_l3_pg, __PAGE_HYPERVISOR_RW);
++
+     cr4 = pv_guest_cr4_to_real_cr4(v);
+     if ( unlikely(cr4 != read_cr4()) )
+         write_cr4(cr4);
+@@ -2096,6 +2099,8 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
+ 
+     ASSERT(local_irq_is_enabled());
+ 
++    get_cpu_info()->xen_cr3 = 0;
++
+     cpumask_copy(&dirty_mask, next->vcpu_dirty_cpumask);
+     /* Allow at most one CPU at a time to be dirty. */
+     ASSERT(cpumask_weight(&dirty_mask) <= 1);
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 50f500c940..c9e4003989 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -3857,6 +3857,7 @@ long do_mmu_update(
+     struct vcpu *curr = current, *v = curr;
+     struct domain *d = v->domain, *pt_owner = d, *pg_owner;
+     struct domain_mmap_cache mapcache;
++    bool_t sync_guest = 0;
+     uint32_t xsm_needed = 0;
+     uint32_t xsm_checked = 0;
+     int rc = put_old_guest_table(curr);
+@@ -4005,6 +4006,8 @@ long do_mmu_update(
+                 case PGT_l4_page_table:
+                     rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn,
+                                       cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
++                    if ( !rc )
++                        sync_guest = 1;
+                     break;
+                 case PGT_writable_page:
+                     perfc_incr(writable_mmu_updates);
+@@ -4107,6 +4110,20 @@ long do_mmu_update(
+ 
+     domain_mmap_cache_destroy(&mapcache);
+ 
++    if ( sync_guest )
++    {
++        /*
++         * Force other vCPU-s of the affected guest to pick up L4 entry
++         * changes (if any). Issue a flush IPI with empty operation mask to
++         * facilitate this (including ourselves waiting for the IPI to
++         * actually have arrived). Utilize the fact that FLUSH_VA_VALID is
++         * meaningless without FLUSH_CACHE, but will allow to pass the no-op
++         * check in flush_area_mask().
++         */
++        flush_area_mask(pt_owner->domain_dirty_cpumask,
++                        ZERO_BLOCK_PTR, FLUSH_VA_VALID);
++    }
++
+     perfc_add(num_page_updates, i);
+ 
+  out:
+diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
+index f9e4ee85ff..eaeec5acf0 100644
+--- a/xen/arch/x86/smpboot.c
++++ b/xen/arch/x86/smpboot.c
+@@ -319,6 +319,9 @@ void start_secondary(void *unused)
+      */
+     spin_debug_disable();
+ 
++    get_cpu_info()->xen_cr3 = 0;
++    get_cpu_info()->pv_cr3 = __pa(this_cpu(root_pgt));
++
+     load_system_tables();
+ 
+     /* Full exception support from here on in. */
+@@ -628,6 +631,187 @@ void cpu_exit_clear(unsigned int cpu)
+     set_cpu_state(CPU_STATE_DEAD);
+ }
+ 
++static int clone_mapping(const void *ptr, root_pgentry_t *rpt)
++{
++    unsigned long linear = (unsigned long)ptr, pfn;
++    unsigned int flags;
++    l3_pgentry_t *pl3e = l4e_to_l3e(idle_pg_table[root_table_offset(linear)]) +
++                         l3_table_offset(linear);
++    l2_pgentry_t *pl2e;
++    l1_pgentry_t *pl1e;
++
++    if ( linear < DIRECTMAP_VIRT_START )
++        return 0;
++
++    flags = l3e_get_flags(*pl3e);
++    ASSERT(flags & _PAGE_PRESENT);
++    if ( flags & _PAGE_PSE )
++    {
++        pfn = (l3e_get_pfn(*pl3e) & ~((1UL << (2 * PAGETABLE_ORDER)) - 1)) |
++              (PFN_DOWN(linear) & ((1UL << (2 * PAGETABLE_ORDER)) - 1));
++        flags &= ~_PAGE_PSE;
++    }
++    else
++    {
++        pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(linear);
++        flags = l2e_get_flags(*pl2e);
++        ASSERT(flags & _PAGE_PRESENT);
++        if ( flags & _PAGE_PSE )
++        {
++            pfn = (l2e_get_pfn(*pl2e) & ~((1UL << PAGETABLE_ORDER) - 1)) |
++                  (PFN_DOWN(linear) & ((1UL << PAGETABLE_ORDER) - 1));
++            flags &= ~_PAGE_PSE;
++        }
++        else
++        {
++            pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(linear);
++            flags = l1e_get_flags(*pl1e);
++            if ( !(flags & _PAGE_PRESENT) )
++                return 0;
++            pfn = l1e_get_pfn(*pl1e);
++        }
++    }
++
++    if ( !(root_get_flags(rpt[root_table_offset(linear)]) & _PAGE_PRESENT) )
++    {
++        pl3e = alloc_xen_pagetable();
++        if ( !pl3e )
++            return -ENOMEM;
++        clear_page(pl3e);
++        l4e_write(&rpt[root_table_offset(linear)],
++                  l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR));
++    }
++    else
++        pl3e = l4e_to_l3e(rpt[root_table_offset(linear)]);
++
++    pl3e += l3_table_offset(linear);
++
++    if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
++    {
++        pl2e = alloc_xen_pagetable();
++        if ( !pl2e )
++            return -ENOMEM;
++        clear_page(pl2e);
++        l3e_write(pl3e, l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR));
++    }
++    else
++    {
++        ASSERT(!(l3e_get_flags(*pl3e) & _PAGE_PSE));
++        pl2e = l3e_to_l2e(*pl3e);
++    }
++
++    pl2e += l2_table_offset(linear);
++
++    if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
++    {
++        pl1e = alloc_xen_pagetable();
++        if ( !pl1e )
++            return -ENOMEM;
++        clear_page(pl1e);
++        l2e_write(pl2e, l2e_from_paddr(__pa(pl1e), __PAGE_HYPERVISOR));
++    }
++    else
++    {
++        ASSERT(!(l2e_get_flags(*pl2e) & _PAGE_PSE));
++        pl1e = l2e_to_l1e(*pl2e);
++    }
++
++    pl1e += l1_table_offset(linear);
++
++    if ( l1e_get_flags(*pl1e) & _PAGE_PRESENT )
++    {
++        ASSERT(l1e_get_pfn(*pl1e) == pfn);
++        ASSERT(l1e_get_flags(*pl1e) == flags);
++    }
++    else
++        l1e_write(pl1e, l1e_from_pfn(pfn, flags));
++
++    return 0;
++}
++
++DEFINE_PER_CPU(root_pgentry_t *, root_pgt);
++
++static int setup_cpu_root_pgt(unsigned int cpu)
++{
++    root_pgentry_t *rpt = alloc_xen_pagetable();
++    unsigned int off;
++    int rc;
++
++    if ( !rpt )
++        return -ENOMEM;
++
++    clear_page(rpt);
++    per_cpu(root_pgt, cpu) = rpt;
++
++    rpt[root_table_offset(RO_MPT_VIRT_START)] =
++        idle_pg_table[root_table_offset(RO_MPT_VIRT_START)];
++    /* SH_LINEAR_PT inserted together with guest mappings. */
++    /* PERDOMAIN inserted during context switch. */
++    rpt[root_table_offset(XEN_VIRT_START)] =
++        idle_pg_table[root_table_offset(XEN_VIRT_START)];
++
++    /* Install direct map page table entries for stack, IDT, and TSS. */
++    for ( off = rc = 0; !rc && off < STACK_SIZE; off += PAGE_SIZE )
++        rc = clone_mapping(__va(__pa(stack_base[cpu])) + off, rpt);
++
++    if ( !rc )
++        rc = clone_mapping(idt_tables[cpu], rpt);
++    if ( !rc )
++        rc = clone_mapping(&per_cpu(init_tss, cpu), rpt);
++
++    return rc;
++}
++
++static void cleanup_cpu_root_pgt(unsigned int cpu)
++{
++    root_pgentry_t *rpt = per_cpu(root_pgt, cpu);
++    unsigned int r;
++
++    if ( !rpt )
++        return;
++
++    per_cpu(root_pgt, cpu) = NULL;
++
++    for ( r = root_table_offset(DIRECTMAP_VIRT_START);
++          r < root_table_offset(HYPERVISOR_VIRT_END); ++r )
++    {
++        l3_pgentry_t *l3t;
++        unsigned int i3;
++
++        if ( !(root_get_flags(rpt[r]) & _PAGE_PRESENT) )
++            continue;
++
++        l3t = l4e_to_l3e(rpt[r]);
++
++        for ( i3 = 0; i3 < L3_PAGETABLE_ENTRIES; ++i3 )
++        {
++            l2_pgentry_t *l2t;
++            unsigned int i2;
++
++            if ( !(l3e_get_flags(l3t[i3]) & _PAGE_PRESENT) )
++                continue;
++
++            ASSERT(!(l3e_get_flags(l3t[i3]) & _PAGE_PSE));
++            l2t = l3e_to_l2e(l3t[i3]);
++
++            for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; ++i2 )
++            {
++                if ( !(l2e_get_flags(l2t[i2]) & _PAGE_PRESENT) )
++                    continue;
++
++                ASSERT(!(l2e_get_flags(l2t[i2]) & _PAGE_PSE));
++                free_xen_pagetable(l2e_to_l1e(l2t[i2]));
++            }
++
++            free_xen_pagetable(l2t);
++        }
++
++        free_xen_pagetable(l3t);
++    }
++
++    free_xen_pagetable(rpt);
++}
++
+ static void cpu_smpboot_free(unsigned int cpu)
+ {
+     unsigned int order, socket = cpu_to_socket(cpu);
+@@ -664,6 +848,8 @@ static void cpu_smpboot_free(unsigned int cpu)
+             free_domheap_page(mfn_to_page(mfn));
+     }
+ 
++    cleanup_cpu_root_pgt(cpu);
++
+     order = get_order_from_pages(NR_RESERVED_GDT_PAGES);
+     free_xenheap_pages(per_cpu(gdt_table, cpu), order);
+ 
+@@ -719,6 +905,9 @@ static int cpu_smpboot_alloc(unsigned int cpu)
+     set_ist(&idt_tables[cpu][TRAP_nmi],           IST_NONE);
+     set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
+ 
++    if ( setup_cpu_root_pgt(cpu) )
++        goto oom;
++
+     for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1);
+           i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i )
+         if ( cpu_online(i) && cpu_to_node(i) == node )
+@@ -773,6 +962,8 @@ static struct notifier_block cpu_smpboot_nfb = {
+ 
+ void __init smp_prepare_cpus(unsigned int max_cpus)
+ {
++    int rc;
++
+     register_cpu_notifier(&cpu_smpboot_nfb);
+ 
+     mtrr_aps_sync_begin();
+@@ -786,6 +977,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
+ 
+     stack_base[0] = stack_start;
+ 
++    rc = setup_cpu_root_pgt(0);
++    if ( rc )
++        panic("Error %d setting up PV root page table\n", rc);
++    get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0));
++
+     set_nr_sockets();
+ 
+     socket_cpumask = xzalloc_array(cpumask_t *, nr_sockets);
+@@ -850,6 +1046,8 @@ void __init smp_prepare_boot_cpu(void)
+ {
+     cpumask_set_cpu(smp_processor_id(), &cpu_online_map);
+     cpumask_set_cpu(smp_processor_id(), &cpu_present_map);
++
++    get_cpu_info()->xen_cr3 = 0;
+ }
+ 
+ static void
+diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
+index a3ae7a475f..4f2ba28520 100644
+--- a/xen/arch/x86/x86_64/asm-offsets.c
++++ b/xen/arch/x86/x86_64/asm-offsets.c
+@@ -137,6 +137,8 @@ void __dummy__(void)
+     OFFSET(CPUINFO_processor_id, struct cpu_info, processor_id);
+     OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu);
+     OFFSET(CPUINFO_cr4, struct cpu_info, cr4);
++    OFFSET(CPUINFO_xen_cr3, struct cpu_info, xen_cr3);
++    OFFSET(CPUINFO_pv_cr3, struct cpu_info, pv_cr3);
+     DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
+     BLANK();
+ 
+diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
+index 7ee01597a3..f7e53fb3cb 100644
+--- a/xen/arch/x86/x86_64/compat/entry.S
++++ b/xen/arch/x86/x86_64/compat/entry.S
+@@ -270,6 +270,17 @@ ENTRY(cstar_enter)
+         pushq $0
+         movl  $TRAP_syscall, 4(%rsp)
+         SAVE_ALL
++
++        GET_STACK_END(bx)
++        mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
++        neg   %rcx
++        jz    .Lcstar_cr3_okay
++        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++        neg   %rcx
++        write_cr3 rcx, rdi, rsi
++        movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++.Lcstar_cr3_okay:
++
+         GET_CURRENT(bx)
+         movq  VCPU_domain(%rbx),%rcx
+         cmpb  $0,DOMAIN_is_32bit_pv(%rcx)
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index cebb1e4f4f..d63e734bb3 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -36,6 +36,32 @@ ENTRY(switch_to_kernel)
+ /* %rbx: struct vcpu, interrupts disabled */
+ restore_all_guest:
+         ASSERT_INTERRUPTS_DISABLED
++
++        /* Copy guest mappings and switch to per-CPU root page table. */
++        mov   %cr3, %r9
++        GET_STACK_END(dx)
++        mov   STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rdi
++        movabs $PADDR_MASK & PAGE_MASK, %rsi
++        movabs $DIRECTMAP_VIRT_START, %rcx
++        mov   %rdi, %rax
++        and   %rsi, %rdi
++        and   %r9, %rsi
++        add   %rcx, %rdi
++        add   %rcx, %rsi
++        mov   $ROOT_PAGETABLE_FIRST_XEN_SLOT, %ecx
++        mov   root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rsi), %r8
++        mov   %r8, root_table_offset(SH_LINEAR_PT_VIRT_START)*8(%rdi)
++        rep movsq
++        mov   $ROOT_PAGETABLE_ENTRIES - \
++               ROOT_PAGETABLE_LAST_XEN_SLOT - 1, %ecx
++        sub   $(ROOT_PAGETABLE_FIRST_XEN_SLOT - \
++                ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rsi
++        sub   $(ROOT_PAGETABLE_FIRST_XEN_SLOT - \
++                ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rdi
++        rep movsq
++        mov   %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx)
++        write_cr3 rax, rdi, rsi
++
+         RESTORE_ALL
+         testw $TRAP_syscall,4(%rsp)
+         jz    iret_exit_to_guest
+@@ -70,6 +96,22 @@ iret_exit_to_guest:
+         ALIGN
+ /* No special register assumptions. */
+ restore_all_xen:
++        /*
++         * Check whether we need to switch to the per-CPU page tables, in
++         * case we return to late PV exit code (from an NMI or #MC).
++         */
++        GET_STACK_END(ax)
++        mov   STACK_CPUINFO_FIELD(xen_cr3)(%rax), %rdx
++        mov   STACK_CPUINFO_FIELD(pv_cr3)(%rax), %rax
++        test  %rdx, %rdx
++        /*
++         * Ideally the condition would be "nsz", but such doesn't exist,
++         * so "g" will have to do.
++         */
++UNLIKELY_START(g, exit_cr3)
++        write_cr3 rax, rdi, rsi
++UNLIKELY_END(exit_cr3)
++
+         RESTORE_ALL adj=8
+         iretq
+ 
+@@ -99,7 +141,18 @@ ENTRY(lstar_enter)
+         pushq $0
+         movl  $TRAP_syscall, 4(%rsp)
+         SAVE_ALL
+-        GET_CURRENT(bx)
++
++        GET_STACK_END(bx)
++        mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
++        neg   %rcx
++        jz    .Llstar_cr3_okay
++        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++        neg   %rcx
++        write_cr3 rcx, r11, r12
++        movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++.Llstar_cr3_okay:
++
++        __GET_CURRENT(bx)
+         testb $TF_kernel_mode,VCPU_thread_flags(%rbx)
+         jz    switch_to_kernel
+ 
+@@ -248,7 +301,18 @@ GLOBAL(sysenter_eflags_saved)
+         pushq $0
+         movl  $TRAP_syscall, 4(%rsp)
+         SAVE_ALL
+-        GET_CURRENT(bx)
++
++        GET_STACK_END(bx)
++        mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
++        neg   %rcx
++        jz    .Lsyse_cr3_okay
++        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++        neg   %rcx
++        write_cr3 rcx, rdi, rsi
++        movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++.Lsyse_cr3_okay:
++
++        __GET_CURRENT(bx)
+         cmpb  $0,VCPU_sysenter_disables_events(%rbx)
+         movq  VCPU_sysenter_addr(%rbx),%rax
+         setne %cl
+@@ -284,13 +348,23 @@ ENTRY(int80_direct_trap)
+         movl  $0x80, 4(%rsp)
+         SAVE_ALL
+ 
++        GET_STACK_END(bx)
++        mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
++        neg   %rcx
++        jz    .Lint80_cr3_okay
++        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++        neg   %rcx
++        write_cr3 rcx, rdi, rsi
++        movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
++.Lint80_cr3_okay:
++
+         cmpb  $0,untrusted_msi(%rip)
+ UNLIKELY_START(ne, msi_check)
+         movl  $0x80,%edi
+         call  check_for_unexpected_msi
+ UNLIKELY_END(msi_check)
+ 
+-        GET_CURRENT(bx)
++        __GET_CURRENT(bx)
+ 
+         /* Check that the callback is non-null. */
+         leaq  VCPU_int80_bounce(%rbx),%rdx
+@@ -441,9 +515,27 @@ ENTRY(dom_crash_sync_extable)
+ 
+ ENTRY(common_interrupt)
+         SAVE_ALL CLAC
++
++        GET_STACK_END(14)
++        mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
++        mov   %rcx, %r15
++        neg   %rcx
++        jz    .Lintr_cr3_okay
++        jns   .Lintr_cr3_load
++        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
++        neg   %rcx
++.Lintr_cr3_load:
++        write_cr3 rcx, rdi, rsi
++        xor   %ecx, %ecx
++        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
++        testb $3, UREGS_cs(%rsp)
++        cmovnz %rcx, %r15
++.Lintr_cr3_okay:
++
+         CR4_PV32_RESTORE
+         movq %rsp,%rdi
+         callq do_IRQ
++        mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+         jmp ret_from_intr
+ 
+ /* No special register assumptions. */
+@@ -461,6 +553,23 @@ ENTRY(page_fault)
+ /* No special register assumptions. */
+ GLOBAL(handle_exception)
+         SAVE_ALL CLAC
++
++        GET_STACK_END(14)
++        mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
++        mov   %rcx, %r15
++        neg   %rcx
++        jz    .Lxcpt_cr3_okay
++        jns   .Lxcpt_cr3_load
++        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
++        neg   %rcx
++.Lxcpt_cr3_load:
++        write_cr3 rcx, rdi, rsi
++        xor   %ecx, %ecx
++        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
++        testb $3, UREGS_cs(%rsp)
++        cmovnz %rcx, %r15
++.Lxcpt_cr3_okay:
++
+ handle_exception_saved:
+         GET_CURRENT(bx)
+         testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rsp)
+@@ -525,6 +634,7 @@ handle_exception_saved:
+         leaq  exception_table(%rip),%rdx
+         PERFC_INCR(exceptions, %rax, %rbx)
+         callq *(%rdx,%rax,8)
++        mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+         testb $3,UREGS_cs(%rsp)
+         jz    restore_all_xen
+         leaq  VCPU_trap_bounce(%rbx),%rdx
+@@ -557,6 +667,7 @@ exception_with_ints_disabled:
+         rep;  movsq                     # make room for ec/ev
+ 1:      movq  UREGS_error_code(%rsp),%rax # ec/ev
+         movq  %rax,UREGS_kernel_sizeof(%rsp)
++        mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+         jmp   restore_all_xen           # return to fixup code
+ 
+ /* No special register assumptions. */
+@@ -634,6 +745,17 @@ ENTRY(double_fault)
+         movl  $TRAP_double_fault,4(%rsp)
+         /* Set AC to reduce chance of further SMAP faults */
+         SAVE_ALL STAC
++
++        GET_STACK_END(bx)
++        mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rbx
++        test  %rbx, %rbx
++        jz    .Ldblf_cr3_okay
++        jns   .Ldblf_cr3_load
++        neg   %rbx
++.Ldblf_cr3_load:
++        write_cr3 rbx, rdi, rsi
++.Ldblf_cr3_okay:
++
+         movq  %rsp,%rdi
+         call  do_double_fault
+         BUG   /* do_double_fault() shouldn't return. */
+@@ -652,10 +774,28 @@ ENTRY(nmi)
+         movl  $TRAP_nmi,4(%rsp)
+ handle_ist_exception:
+         SAVE_ALL CLAC
++
++        GET_STACK_END(14)
++        mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
++        mov   %rcx, %r15
++        neg   %rcx
++        jz    .List_cr3_okay
++        jns   .List_cr3_load
++        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
++        neg   %rcx
++.List_cr3_load:
++        write_cr3 rcx, rdi, rsi
++        movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
++.List_cr3_okay:
++
+         CR4_PV32_RESTORE
+         testb $3,UREGS_cs(%rsp)
+         jz    1f
+-        /* Interrupted guest context. Copy the context to stack bottom. */
++        /*
++         * Interrupted guest context. Clear the restore value for xen_cr3
++         * and copy the context to stack bottom.
++         */
++        xor   %r15, %r15
+         GET_CPUINFO_FIELD(guest_cpu_user_regs,di)
+         movq  %rsp,%rsi
+         movl  $UREGS_kernel_sizeof/8,%ecx
+@@ -665,6 +805,7 @@ handle_ist_exception:
+         movzbl UREGS_entry_vector(%rsp),%eax
+         leaq  exception_table(%rip),%rdx
+         callq *(%rdx,%rax,8)
++        mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+         cmpb  $TRAP_nmi,UREGS_entry_vector(%rsp)
+         jne   ret_from_intr
+ 
+diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h
+index 6e5c079ad8..6cfdaa1aa0 100644
+--- a/xen/include/asm-x86/asm_defns.h
++++ b/xen/include/asm-x86/asm_defns.h
+@@ -93,9 +93,30 @@ void ret_from_intr(void);
+         UNLIKELY_DONE(mp, tag);   \
+         __UNLIKELY_END(tag)
+ 
++        .equ .Lrax, 0
++        .equ .Lrcx, 1
++        .equ .Lrdx, 2
++        .equ .Lrbx, 3
++        .equ .Lrsp, 4
++        .equ .Lrbp, 5
++        .equ .Lrsi, 6
++        .equ .Lrdi, 7
++        .equ .Lr8,  8
++        .equ .Lr9,  9
++        .equ .Lr10, 10
++        .equ .Lr11, 11
++        .equ .Lr12, 12
++        .equ .Lr13, 13
++        .equ .Lr14, 14
++        .equ .Lr15, 15
++
+ #define STACK_CPUINFO_FIELD(field) (1 - CPUINFO_sizeof + CPUINFO_##field)
+ #define GET_STACK_END(reg)                        \
++        .if .Lr##reg > 8;                         \
++        movq $STACK_SIZE-1, %r##reg;              \
++        .else;                                    \
+         movl $STACK_SIZE-1, %e##reg;              \
++        .endif;                                   \
+         orq  %rsp, %r##reg
+ 
+ #define GET_CPUINFO_FIELD(field, reg)             \
+@@ -177,6 +198,15 @@ void ret_from_intr(void);
+ #define ASM_STAC ASM_AC(STAC)
+ #define ASM_CLAC ASM_AC(CLAC)
+ 
++.macro write_cr3 val:req, tmp1:req, tmp2:req
++        mov   %cr4, %\tmp1
++        mov   %\tmp1, %\tmp2
++        and   $~X86_CR4_PGE, %\tmp1
++        mov   %\tmp1, %cr4
++        mov   %\val, %cr3
++        mov   %\tmp2, %cr4
++.endm
++
+ #define CR4_PV32_RESTORE                                           \
+         667: ASM_NOP5;                                             \
+         .pushsection .altinstr_replacement, "ax";                  \
+diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h
+index e6587e684c..397fa4c38f 100644
+--- a/xen/include/asm-x86/current.h
++++ b/xen/include/asm-x86/current.h
+@@ -42,6 +42,18 @@ struct cpu_info {
+     struct vcpu *current_vcpu;
+     unsigned long per_cpu_offset;
+     unsigned long cr4;
++    /*
++     * Of the two following fields the latter is being set to the CR3 value
++     * to be used on the given pCPU for loading whenever 64-bit PV guest
++     * context is being entered. The value never changes once set.
++     * The former is the value to restore when re-entering Xen, if any. IOW
++     * its value being zero means there's nothing to restore. However, its
++     * value can also be negative, indicating to the exit-to-Xen code that
++     * restoring is not necessary, but allowing any nested entry code paths
++     * to still know the value to put back into CR3.
++     */
++    unsigned long xen_cr3;
++    unsigned long pv_cr3;
+     /* get_stack_bottom() must be 16-byte aligned */
+ };
+ 
+diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
+index ccd406a3fe..9906f38f2d 100644
+--- a/xen/include/asm-x86/processor.h
++++ b/xen/include/asm-x86/processor.h
+@@ -517,6 +517,7 @@ extern idt_entry_t idt_table[];
+ extern idt_entry_t *idt_tables[];
+ 
+ DECLARE_PER_CPU(struct tss_struct, init_tss);
++DECLARE_PER_CPU(root_pgentry_t *, root_pgt);
+ 
+ extern void init_int80_direct_trap(struct vcpu *v);
+ 
+diff --git a/xen/include/asm-x86/x86_64/page.h b/xen/include/asm-x86/x86_64/page.h
+index 589f22552e..afc77c3237 100644
+--- a/xen/include/asm-x86/x86_64/page.h
++++ b/xen/include/asm-x86/x86_64/page.h
+@@ -25,8 +25,8 @@
+ /* These are architectural limits. Current CPUs support only 40-bit phys. */
+ #define PADDR_BITS              52
+ #define VADDR_BITS              48
+-#define PADDR_MASK              ((1UL << PADDR_BITS)-1)
+-#define VADDR_MASK              ((1UL << VADDR_BITS)-1)
++#define PADDR_MASK              ((_AC(1,UL) << PADDR_BITS) - 1)
++#define VADDR_MASK              ((_AC(1,UL) << VADDR_BITS) - 1)
+ 
+ #define is_canonical_address(x) (((long)(x) >> 47) == ((long)(x) >> 63))
+ 
+@@ -117,6 +117,7 @@ typedef l4_pgentry_t root_pgentry_t;
+       : (((_s) < ROOT_PAGETABLE_FIRST_XEN_SLOT) ||  \
+          ((_s) > ROOT_PAGETABLE_LAST_XEN_SLOT)))
+ 
++#define root_table_offset         l4_table_offset
+ #define root_get_pfn              l4e_get_pfn
+ #define root_get_flags            l4e_get_flags
+ #define root_get_intpte           l4e_get_intpte
+-- 
+2.15.1
+
diff --git a/emulators/xen-kernel/files/0001-x86-compat-fix-compilation-errors-with-clang-6.patch b/emulators/xen-kernel/files/0001-x86-compat-fix-compilation-errors-with-clang-6.patch
new file mode 100644
index 000000000000..a75cf8b29281
--- /dev/null
+++ b/emulators/xen-kernel/files/0001-x86-compat-fix-compilation-errors-with-clang-6.patch
@@ -0,0 +1,76 @@
+From 58e028648e3bc831b1b60a39b7f1661538fa6a34 Mon Sep 17 00:00:00 2001
+From: Roger Pau Monne <roger.pau@citrix.com>
+Date: Tue, 23 Jan 2018 16:05:17 +0000
+Subject: [PATCH] x86/compat: fix compilation errors with clang 6
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The following errors are generated when compiling Xen with clang 6:
+
+In file included from x86_64/asm-offsets.c:9:
+In file included from /root/src/xen/xen/include/xen/sched.h:8:
+In file included from /root/src/xen/xen/include/xen/shared.h:6:
+In file included from /root/src/xen/xen/include/compat/arch-x86/../xen.h:9:
+/root/src/xen/xen/include/compat/arch-x86/xen.h:10:10: error: the current #pragma pack aligment
+      value is modified in the included file [-Werror,-Wpragma-pack]
+#include "xen-x86_32.h"
+         ^
+/root/src/xen/xen/include/compat/arch-x86/xen-x86_32.h:40:9: note: previous '#pragma pack'
+      directive that modifies alignment is here
+#pragma pack()
+        ^
+In file included from x86_64/asm-offsets.c:9:
+In file included from /root/src/xen/xen/include/xen/sched.h:8:
+In file included from /root/src/xen/xen/include/xen/shared.h:6:
+/root/src/xen/xen/include/compat/arch-x86/../xen.h:9:10: error: the current #pragma pack aligment
+      value is modified in the included file [-Werror,-Wpragma-pack]
+#include "arch-x86/xen.h"
+         ^
+/root/src/xen/xen/include/compat/arch-x86/xen.h:71:9: note: previous '#pragma pack' directive that
+      modifies alignment is here
+#pragma pack()
+        ^
+2 errors generated.
+
+Fix this by using pragma push/pop in order to store the current pragma
+value in the compiler stack and later restoring it when using clang.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+---
+Cc: Andrew Cooper <andrew.cooper3@citrix.com>
+Cc: George Dunlap <George.Dunlap@eu.citrix.com>
+Cc: Ian Jackson <ian.jackson@eu.citrix.com>
+Cc: Jan Beulich <jbeulich@suse.com>
+Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Cc: Stefano Stabellini <sstabellini@kernel.org>
+Cc: Tim Deegan <tim@xen.org>
+Cc: Wei Liu <wei.liu2@citrix.com>
+---
+Changes since v1:
+ - Only use push/pop with clang.
+---
+ xen/include/Makefile | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/xen/include/Makefile b/xen/include/Makefile
+index 268bc9d6ba..eeae942903 100644
+--- a/xen/include/Makefile
++++ b/xen/include/Makefile
+@@ -34,8 +34,13 @@ cppflags-y                := -include public/xen-compat.h
+ cppflags-$(CONFIG_X86)    += -m32
+ 
+ # 8-byte types are 4-byte aligned on x86_32 ...
++ifeq ($(clang),y)
++prefix-$(CONFIG_X86)      := \#pragma pack(push, 4)
++suffix-$(CONFIG_X86)      := \#pragma pack(pop)
++else
+ prefix-$(CONFIG_X86)      := \#pragma pack(4)
+ suffix-$(CONFIG_X86)      := \#pragma pack()
++endif
+ 
+ endif
+ 
+-- 
+2.15.1
+
diff --git a/emulators/xen-kernel/files/0002-p2m-Check-return-value-of-p2m_set_entry-when-decreas.patch b/emulators/xen-kernel/files/0002-p2m-Check-return-value-of-p2m_set_entry-when-decreas.patch
new file mode 100644
index 000000000000..e72d7511b3be
--- /dev/null
+++ b/emulators/xen-kernel/files/0002-p2m-Check-return-value-of-p2m_set_entry-when-decreas.patch
@@ -0,0 +1,109 @@
+From 01feeda5363dd8d2fea8395c2c435203751c8ba5 Mon Sep 17 00:00:00 2001
+From: George Dunlap <george.dunlap@citrix.com>
+Date: Fri, 10 Nov 2017 16:53:55 +0000
+Subject: [PATCH 2/2] p2m: Check return value of p2m_set_entry() when
+ decreasing reservation
+
+If the entire range specified to p2m_pod_decrease_reservation() is marked
+populate-on-demand, then it will make a single p2m_set_entry() call,
+reducing its PoD entry count.
+
+Unfortunately, in the right circumstances, this p2m_set_entry() call
+may fail.  It that case, repeated calls to decrease_reservation() may
+cause p2m->pod.entry_count to fall below zero, potentially tripping
+over BUG_ON()s to the contrary.
+
+Instead, check to see if the entry succeeded, and return false if not.
+The caller will then call guest_remove_page() on the gfns, which will
+return -EINVAL upon finding no valid memory there to return.
+
+Unfortunately if the order > 0, the entry may have partially changed.
+A domain_crash() is probably the safest thing in that case.
+
+Other p2m_set_entry() calls in the same function should be fine,
+because they are writing the entry at its current order.  Nonetheless,
+check the return value and crash if our assumption turns otu to be
+wrong.
+
+This is part of XSA-247.
+
+Reported-by: George Dunlap <george.dunlap.com>
+Signed-off-by: George Dunlap <george.dunlap@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+---
+v2: Crash the domain if we're not sure it's safe (or if we think it
+can't happen)
+---
+ xen/arch/x86/mm/p2m-pod.c | 42 +++++++++++++++++++++++++++++++++---------
+ 1 file changed, 33 insertions(+), 9 deletions(-)
+
+diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
+index 5ec8a37949..91d309647e 100644
+--- a/xen/arch/x86/mm/p2m-pod.c
++++ b/xen/arch/x86/mm/p2m-pod.c
+@@ -557,11 +557,23 @@ p2m_pod_decrease_reservation(struct domain *d,
+ 
+     if ( !nonpod )
+     {
+-        /* All PoD: Mark the whole region invalid and tell caller
+-         * we're done. */
+-        p2m_set_entry(p2m, gpfn, _mfn(INVALID_MFN), order, p2m_invalid,
+-                      p2m->default_access);
+-        p2m->pod.entry_count-=(1<<order);
++        /*
++         * All PoD: Mark the whole region invalid and tell caller
++         * we're done.
++         */
++        if ( p2m_set_entry(p2m, gpfn, _mfn(INVALID_MFN), order, p2m_invalid,
++                           p2m->default_access) )
++        {
++            /*
++             * If this fails, we can't tell how much of the range was changed.
++             * Best to crash the domain unless we're sure a partial change is
++             * impossible.
++             */
++            if ( order != 0 )
++                domain_crash(d);
++            goto out_unlock;
++        }
++        p2m->pod.entry_count -= 1UL << order;
+         BUG_ON(p2m->pod.entry_count < 0);
+         ret = 1;
+         goto out_entry_check;
+@@ -602,8 +614,14 @@ p2m_pod_decrease_reservation(struct domain *d,
+         n = 1UL << cur_order;
+         if ( t == p2m_populate_on_demand )
+         {
+-            p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), cur_order,
+-                          p2m_invalid, p2m->default_access);
++            /* This shouldn't be able to fail */
++            if ( p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), cur_order,
++                               p2m_invalid, p2m->default_access) )
++            {
++                ASSERT_UNREACHABLE();
++                domain_crash(d);
++                goto out_unlock;
++            }
+             p2m->pod.entry_count -= n;
+             BUG_ON(p2m->pod.entry_count < 0);
+             pod -= n;
+@@ -624,8 +642,14 @@ p2m_pod_decrease_reservation(struct domain *d,
+ 
+             page = mfn_to_page(mfn);
+ 
+-            p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), cur_order,
+-                          p2m_invalid, p2m->default_access);
++            /* This shouldn't be able to fail */
++            if ( p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), cur_order,
++                               p2m_invalid, p2m->default_access) )
++            {
++                ASSERT_UNREACHABLE();
++                domain_crash(d);
++                goto out_unlock;
++            }
+             p2m_tlb_flush_sync(p2m);
+             for ( j = 0; j < n; ++j )
+                 set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY);
+-- 
+2.15.0
+
diff --git a/emulators/xen-kernel/files/0002-x86-allow-Meltdown-band-aid-to-be-disabled.patch b/emulators/xen-kernel/files/0002-x86-allow-Meltdown-band-aid-to-be-disabled.patch
new file mode 100644
index 000000000000..20894e12cc19
--- /dev/null
+++ b/emulators/xen-kernel/files/0002-x86-allow-Meltdown-band-aid-to-be-disabled.patch
@@ -0,0 +1,163 @@
+From e19d0af4ee2ae9e42a85db639fd6848e72f5658b Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Wed, 17 Jan 2018 17:24:59 +0100
+Subject: [PATCH 2/2] x86: allow Meltdown band-aid to be disabled
+
+First of all we don't need it on AMD systems. Additionally allow its use
+to be controlled by command line option. For best backportability, this
+intentionally doesn't use alternative instruction patching to achieve
+the intended effect - while we likely want it, this will be later
+follow-up.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: e871e80c38547d9faefc6604532ba3e985e65873
+master date: 2018-01-16 17:50:59 +0100
+---
+ docs/misc/xen-command-line.markdown | 12 ++++++++++++
+ xen/arch/x86/domain.c               |  7 +++++--
+ xen/arch/x86/mm.c                   |  2 +-
+ xen/arch/x86/smpboot.c              | 17 ++++++++++++++---
+ xen/arch/x86/x86_64/entry.S         |  2 ++
+ 5 files changed, 34 insertions(+), 6 deletions(-)
+
+diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
+index 2dacb5d073..aecf9fd49d 100644
+--- a/docs/misc/xen-command-line.markdown
++++ b/docs/misc/xen-command-line.markdown
+@@ -1621,6 +1621,18 @@ In the case that x2apic is in use, this option switches between physical and
+ clustered mode.  The default, given no hint from the **FADT**, is cluster
+ mode.
+ 
++### xpti
++> `= <boolean>`
++
++> Default: `false` on AMD hardware
++> Default: `true` everywhere else
++
++Override default selection of whether to isolate 64-bit PV guest page
++tables.
++
++** WARNING: Not yet a complete isolation implementation, but better than
++nothing. **
++
+ ### xsave
+ > `= <boolean>`
+ 
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index 3cf18f95b7..a1bda5e12d 100644
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -1945,12 +1945,15 @@ static void paravirt_ctxt_switch_from(struct vcpu *v)
+ 
+ static void paravirt_ctxt_switch_to(struct vcpu *v)
+ {
++    root_pgentry_t *root_pgt = this_cpu(root_pgt);
+     unsigned long cr4;
+ 
+     switch_kernel_stack(v);
+ 
+-    this_cpu(root_pgt)[root_table_offset(PERDOMAIN_VIRT_START)] =
+-        l4e_from_page(v->domain->arch.perdomain_l3_pg, __PAGE_HYPERVISOR_RW);
++    if ( root_pgt )
++        root_pgt[root_table_offset(PERDOMAIN_VIRT_START)] =
++            l4e_from_page(v->domain->arch.perdomain_l3_pg,
++                          __PAGE_HYPERVISOR_RW);
+ 
+     cr4 = pv_guest_cr4_to_real_cr4(v);
+     if ( unlikely(cr4 != read_cr4()) )
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index c9e4003989..07015e3160 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -4007,7 +4007,7 @@ long do_mmu_update(
+                     rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn,
+                                       cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
+                     if ( !rc )
+-                        sync_guest = 1;
++                        sync_guest = !!this_cpu(root_pgt);
+                     break;
+                 case PGT_writable_page:
+                     perfc_incr(writable_mmu_updates);
+diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
+index eaeec5acf0..f2f47f612a 100644
+--- a/xen/arch/x86/smpboot.c
++++ b/xen/arch/x86/smpboot.c
+@@ -320,7 +320,7 @@ void start_secondary(void *unused)
+     spin_debug_disable();
+ 
+     get_cpu_info()->xen_cr3 = 0;
+-    get_cpu_info()->pv_cr3 = __pa(this_cpu(root_pgt));
++    get_cpu_info()->pv_cr3 = this_cpu(root_pgt) ? __pa(this_cpu(root_pgt)) : 0;
+ 
+     load_system_tables();
+ 
+@@ -729,14 +729,20 @@ static int clone_mapping(const void *ptr, root_pgentry_t *rpt)
+     return 0;
+ }
+ 
++static __read_mostly int8_t opt_xpti = -1;
++boolean_param("xpti", opt_xpti);
+ DEFINE_PER_CPU(root_pgentry_t *, root_pgt);
+ 
+ static int setup_cpu_root_pgt(unsigned int cpu)
+ {
+-    root_pgentry_t *rpt = alloc_xen_pagetable();
++    root_pgentry_t *rpt;
+     unsigned int off;
+     int rc;
+ 
++    if ( !opt_xpti )
++        return 0;
++
++    rpt = alloc_xen_pagetable();
+     if ( !rpt )
+         return -ENOMEM;
+ 
+@@ -977,10 +983,14 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
+ 
+     stack_base[0] = stack_start;
+ 
++    if ( opt_xpti < 0 )
++        opt_xpti = boot_cpu_data.x86_vendor != X86_VENDOR_AMD;
++
+     rc = setup_cpu_root_pgt(0);
+     if ( rc )
+         panic("Error %d setting up PV root page table\n", rc);
+-    get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0));
++    if ( per_cpu(root_pgt, 0) )
++        get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0));
+ 
+     set_nr_sockets();
+ 
+@@ -1048,6 +1058,7 @@ void __init smp_prepare_boot_cpu(void)
+     cpumask_set_cpu(smp_processor_id(), &cpu_present_map);
+ 
+     get_cpu_info()->xen_cr3 = 0;
++    get_cpu_info()->pv_cr3 = 0;
+ }
+ 
+ static void
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index d63e734bb3..2a569952e3 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -45,6 +45,7 @@ restore_all_guest:
+         movabs $DIRECTMAP_VIRT_START, %rcx
+         mov   %rdi, %rax
+         and   %rsi, %rdi
++        jz    .Lrag_keep_cr3
+         and   %r9, %rsi
+         add   %rcx, %rdi
+         add   %rcx, %rsi
+@@ -61,6 +62,7 @@ restore_all_guest:
+         rep movsq
+         mov   %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx)
+         write_cr3 rax, rdi, rsi
++.Lrag_keep_cr3:
+ 
+         RESTORE_ALL
+         testw $TRAP_syscall,4(%rsp)
+-- 
+2.15.1
+
diff --git a/emulators/xen-kernel/files/xsa246-4.7.patch b/emulators/xen-kernel/files/xsa246-4.7.patch
new file mode 100644
index 000000000000..bb58d6e7c840
--- /dev/null
+++ b/emulators/xen-kernel/files/xsa246-4.7.patch
@@ -0,0 +1,74 @@
+From: Julien Grall <julien.grall@linaro.org>
+Subject: x86/pod: prevent infinite loop when shattering large pages
+
+When populating pages, the PoD may need to split large ones using
+p2m_set_entry and request the caller to retry (see ept_get_entry for
+instance).
+
+p2m_set_entry may fail to shatter if it is not possible to allocate
+memory for the new page table. However, the error is not propagated
+resulting to the callers to retry infinitely the PoD.
+
+Prevent the infinite loop by return false when it is not possible to
+shatter the large mapping.
+
+This is XSA-246.
+
+Signed-off-by: Julien Grall <julien.grall@linaro.org>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: George Dunlap <george.dunlap@citrix.com>
+
+--- a/xen/arch/x86/mm/p2m-pod.c
++++ b/xen/arch/x86/mm/p2m-pod.c
+@@ -1073,9 +1073,8 @@ p2m_pod_demand_populate(struct p2m_domai
+          * NOTE: In a fine-grained p2m locking scenario this operation
+          * may need to promote its locking from gfn->1g superpage
+          */
+-        p2m_set_entry(p2m, gfn_aligned, _mfn(INVALID_MFN), PAGE_ORDER_2M,
+-                      p2m_populate_on_demand, p2m->default_access);
+-        return 0;
++        return p2m_set_entry(p2m, gfn_aligned, _mfn(INVALID_MFN), PAGE_ORDER_2M,
++                             p2m_populate_on_demand, p2m->default_access);
+     }
+ 
+     /* Only reclaim if we're in actual need of more cache. */
+@@ -1106,8 +1105,12 @@ p2m_pod_demand_populate(struct p2m_domai
+ 
+     gfn_aligned = (gfn >> order) << order;
+ 
+-    p2m_set_entry(p2m, gfn_aligned, mfn, order, p2m_ram_rw,
+-                  p2m->default_access);
++    if ( p2m_set_entry(p2m, gfn_aligned, mfn, order, p2m_ram_rw,
++                       p2m->default_access) )
++    {
++        p2m_pod_cache_add(p2m, p, order);
++        goto out_fail;
++    }
+ 
+     for( i = 0; i < (1UL << order); i++ )
+     {
+@@ -1152,13 +1155,18 @@ remap_and_retry:
+     BUG_ON(order != PAGE_ORDER_2M);
+     pod_unlock(p2m);
+ 
+-    /* Remap this 2-meg region in singleton chunks */
+-    /* NOTE: In a p2m fine-grained lock scenario this might
+-     * need promoting the gfn lock from gfn->2M superpage */
++    /*
++     * Remap this 2-meg region in singleton chunks. See the comment on the
++     * 1G page splitting path above for why a single call suffices.
++     *
++     * NOTE: In a p2m fine-grained lock scenario this might
++     * need promoting the gfn lock from gfn->2M superpage.
++     */
+     gfn_aligned = (gfn>>order)<<order;
+-    for(i=0; i<(1<<order); i++)
+-        p2m_set_entry(p2m, gfn_aligned + i, _mfn(INVALID_MFN), PAGE_ORDER_4K,
+-                      p2m_populate_on_demand, p2m->default_access);
++    if ( p2m_set_entry(p2m, gfn_aligned, _mfn(INVALID_MFN), PAGE_ORDER_4K,
++                       p2m_populate_on_demand, p2m->default_access) )
++        return -1;
++
+     if ( tb_init_done )
+     {
+         struct {
diff --git a/emulators/xen-kernel/files/xsa248-4.8.patch b/emulators/xen-kernel/files/xsa248-4.8.patch
new file mode 100644
index 000000000000..d15297e78dff
--- /dev/null
+++ b/emulators/xen-kernel/files/xsa248-4.8.patch
@@ -0,0 +1,162 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86/mm: don't wrongly set page ownership
+
+PV domains can obtain mappings of any pages owned by the correct domain,
+including ones that aren't actually assigned as "normal" RAM, but used
+by Xen internally.  At the moment such "internal" pages marked as owned
+by a guest include pages used to track logdirty bits, as well as p2m
+pages and the "unpaged pagetable" for HVM guests. Since the PV memory
+management and shadow code conflict in their use of struct page_info
+fields, and since shadow code is being used for log-dirty handling for
+PV domains, pages coming from the shadow pool must, for PV domains, not
+have the domain set as their owner.
+
+While the change could be done conditionally for just the PV case in
+shadow code, do it unconditionally (and for consistency also for HAP),
+just to be on the safe side.
+
+There's one special case though for shadow code: The page table used for
+running a HVM guest in unpaged mode is subject to get_page() (in
+set_shadow_status()) and hence must have its owner set.
+
+This is XSA-248.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Tim Deegan <tim@xen.org>
+Reviewed-by: George Dunlap <george.dunlap@citrix.com>
+
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -283,8 +283,7 @@ static struct page_info *hap_alloc_p2m_p
+     {
+         d->arch.paging.hap.total_pages--;
+         d->arch.paging.hap.p2m_pages++;
+-        page_set_owner(pg, d);
+-        pg->count_info |= 1;
++        ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask));
+     }
+     else if ( !d->arch.paging.p2m_alloc_failed )
+     {
+@@ -299,21 +298,23 @@ static struct page_info *hap_alloc_p2m_p
+ 
+ static void hap_free_p2m_page(struct domain *d, struct page_info *pg)
+ {
++    struct domain *owner = page_get_owner(pg);
++
+     /* This is called both from the p2m code (which never holds the 
+      * paging lock) and the log-dirty code (which always does). */
+     paging_lock_recursive(d);
+ 
+-    ASSERT(page_get_owner(pg) == d);
+-    /* Should have just the one ref we gave it in alloc_p2m_page() */
+-    if ( (pg->count_info & PGC_count_mask) != 1 ) {
+-        HAP_ERROR("Odd p2m page %p count c=%#lx t=%"PRtype_info"\n",
+-                     pg, pg->count_info, pg->u.inuse.type_info);
++    /* Should still have no owner and count zero. */
++    if ( owner || (pg->count_info & PGC_count_mask) )
++    {
++        HAP_ERROR("d%d: Odd p2m page %"PRI_mfn" d=%d c=%lx t=%"PRtype_info"\n",
++                  d->domain_id, mfn_x(page_to_mfn(pg)),
++                  owner ? owner->domain_id : DOMID_INVALID,
++                  pg->count_info, pg->u.inuse.type_info);
+         WARN();
++        pg->count_info &= ~PGC_count_mask;
++        page_set_owner(pg, NULL);
+     }
+-    pg->count_info &= ~PGC_count_mask;
+-    /* Free should not decrement domain's total allocation, since
+-     * these pages were allocated without an owner. */
+-    page_set_owner(pg, NULL);
+     d->arch.paging.hap.p2m_pages--;
+     d->arch.paging.hap.total_pages++;
+     hap_free(d, page_to_mfn(pg));
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -1573,32 +1573,29 @@ shadow_alloc_p2m_page(struct domain *d)
+     pg = mfn_to_page(shadow_alloc(d, SH_type_p2m_table, 0));
+     d->arch.paging.shadow.p2m_pages++;
+     d->arch.paging.shadow.total_pages--;
++    ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask));
+ 
+     paging_unlock(d);
+ 
+-    /* Unlike shadow pages, mark p2m pages as owned by the domain.
+-     * Marking the domain as the owner would normally allow the guest to
+-     * create mappings of these pages, but these p2m pages will never be
+-     * in the domain's guest-physical address space, and so that is not
+-     * believed to be a concern. */
+-    page_set_owner(pg, d);
+-    pg->count_info |= 1;
+     return pg;
+ }
+ 
+ static void
+ shadow_free_p2m_page(struct domain *d, struct page_info *pg)
+ {
+-    ASSERT(page_get_owner(pg) == d);
+-    /* Should have just the one ref we gave it in alloc_p2m_page() */
+-    if ( (pg->count_info & PGC_count_mask) != 1 )
++    struct domain *owner = page_get_owner(pg);
++
++    /* Should still have no owner and count zero. */
++    if ( owner || (pg->count_info & PGC_count_mask) )
+     {
+-        SHADOW_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n",
++        SHADOW_ERROR("d%d: Odd p2m page %"PRI_mfn" d=%d c=%lx t=%"PRtype_info"\n",
++                     d->domain_id, mfn_x(page_to_mfn(pg)),
++                     owner ? owner->domain_id : DOMID_INVALID,
+                      pg->count_info, pg->u.inuse.type_info);
++        pg->count_info &= ~PGC_count_mask;
++        page_set_owner(pg, NULL);
+     }
+-    pg->count_info &= ~PGC_count_mask;
+     pg->u.sh.type = SH_type_p2m_table; /* p2m code reuses type-info */
+-    page_set_owner(pg, NULL);
+ 
+     /* This is called both from the p2m code (which never holds the
+      * paging lock) and the log-dirty code (which always does). */
+@@ -3216,7 +3213,9 @@ int shadow_enable(struct domain *d, u32
+                     | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER
+                     | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
+         unmap_domain_page(e);
++        pg->count_info = 1;
+         pg->u.inuse.type_info = PGT_l2_page_table | 1 | PGT_validated;
++        page_set_owner(pg, d);
+     }
+ 
+     paging_lock(d);
+@@ -3254,7 +3253,11 @@ int shadow_enable(struct domain *d, u32
+     if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) )
+         p2m_teardown(p2m);
+     if ( rv != 0 && pg != NULL )
++    {
++        pg->count_info &= ~PGC_count_mask;
++        page_set_owner(pg, NULL);
+         shadow_free_p2m_page(d, pg);
++    }
+     domain_unpause(d);
+     return rv;
+ }
+@@ -3363,7 +3366,22 @@ out:
+ 
+     /* Must be called outside the lock */
+     if ( unpaged_pagetable )
++    {
++        if ( page_get_owner(unpaged_pagetable) == d &&
++             (unpaged_pagetable->count_info & PGC_count_mask) == 1 )
++        {
++            unpaged_pagetable->count_info &= ~PGC_count_mask;
++            page_set_owner(unpaged_pagetable, NULL);
++        }
++        /* Complain here in cases where shadow_free_p2m_page() won't. */
++        else if ( !page_get_owner(unpaged_pagetable) &&
++                  !(unpaged_pagetable->count_info & PGC_count_mask) )
++            SHADOW_ERROR("d%d: Odd unpaged pt %"PRI_mfn" c=%lx t=%"PRtype_info"\n",
++                         d->domain_id, mfn_x(page_to_mfn(unpaged_pagetable)),
++                         unpaged_pagetable->count_info,
++                         unpaged_pagetable->u.inuse.type_info);
+         shadow_free_p2m_page(d, unpaged_pagetable);
++    }
+ }
+ 
+ void shadow_final_teardown(struct domain *d)
diff --git a/emulators/xen-kernel/files/xsa249.patch b/emulators/xen-kernel/files/xsa249.patch
new file mode 100644
index 000000000000..ecfa4305e5bf
--- /dev/null
+++ b/emulators/xen-kernel/files/xsa249.patch
@@ -0,0 +1,42 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86/shadow: fix refcount overflow check
+
+Commit c385d27079 ("x86 shadow: for multi-page shadows, explicitly track
+the first page") reduced the refcount width to 25, without adjusting the
+overflow check. Eliminate the disconnect by using a manifest constant.
+
+Interestingly, up to commit 047782fa01 ("Out-of-sync L1 shadows: OOS
+snapshot") the refcount was 27 bits wide, yet the check was already
+using 26.
+
+This is XSA-249.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: George Dunlap <george.dunlap@citrix.com>
+Reviewed-by: Tim Deegan <tim@xen.org>
+---
+v2: Simplify expression back to the style it was.
+
+--- a/xen/arch/x86/mm/shadow/private.h
++++ b/xen/arch/x86/mm/shadow/private.h
+@@ -529,7 +529,7 @@ static inline int sh_get_ref(struct doma
+     x = sp->u.sh.count;
+     nx = x + 1;
+ 
+-    if ( unlikely(nx >= 1U<<26) )
++    if ( unlikely(nx >= (1U << PAGE_SH_REFCOUNT_WIDTH)) )
+     {
+         SHADOW_PRINTK("shadow ref overflow, gmfn=%lx smfn=%lx\n",
+                        __backpointer(sp), mfn_x(smfn));
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -82,7 +82,8 @@ struct page_info
+             unsigned long type:5;   /* What kind of shadow is this? */
+             unsigned long pinned:1; /* Is the shadow pinned? */
+             unsigned long head:1;   /* Is this the first page of the shadow? */
+-            unsigned long count:25; /* Reference count */
++#define PAGE_SH_REFCOUNT_WIDTH 25
++            unsigned long count:PAGE_SH_REFCOUNT_WIDTH; /* Reference count */
+         } sh;
+ 
+         /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
diff --git a/emulators/xen-kernel/files/xsa250.patch b/emulators/xen-kernel/files/xsa250.patch
new file mode 100644
index 000000000000..26aeb33fedaf
--- /dev/null
+++ b/emulators/xen-kernel/files/xsa250.patch
@@ -0,0 +1,67 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86/shadow: fix ref-counting error handling
+
+The old-Linux handling in shadow_set_l4e() mistakenly ORed together the
+results of sh_get_ref() and sh_pin(). As the latter failing is not a
+correctness problem, simply ignore its return value.
+
+In sh_set_toplevel_shadow() a failing sh_get_ref() must not be
+accompanied by installing the entry, despite the domain being crashed.
+
+This is XSA-250.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Tim Deegan <tim@xen.org>
+
+--- a/xen/arch/x86/mm/shadow/multi.c
++++ b/xen/arch/x86/mm/shadow/multi.c
+@@ -923,7 +923,7 @@ static int shadow_set_l4e(struct domain
+                           shadow_l4e_t new_sl4e,
+                           mfn_t sl4mfn)
+ {
+-    int flags = 0, ok;
++    int flags = 0;
+     shadow_l4e_t old_sl4e;
+     paddr_t paddr;
+     ASSERT(sl4e != NULL);
+@@ -938,15 +938,16 @@ static int shadow_set_l4e(struct domain
+     {
+         /* About to install a new reference */
+         mfn_t sl3mfn = shadow_l4e_get_mfn(new_sl4e);
+-        ok = sh_get_ref(d, sl3mfn, paddr);
+-        /* Are we pinning l3 shadows to handle wierd linux behaviour? */
+-        if ( sh_type_is_pinnable(d, SH_type_l3_64_shadow) )
+-            ok |= sh_pin(d, sl3mfn);
+-        if ( !ok )
++
++        if ( !sh_get_ref(d, sl3mfn, paddr) )
+         {
+             domain_crash(d);
+             return SHADOW_SET_ERROR;
+         }
++
++        /* Are we pinning l3 shadows to handle weird Linux behaviour? */
++        if ( sh_type_is_pinnable(d, SH_type_l3_64_shadow) )
++            sh_pin(d, sl3mfn);
+     }
+ 
+     /* Write the new entry */
+@@ -3965,14 +3966,15 @@ sh_set_toplevel_shadow(struct vcpu *v,
+ 
+     /* Take a ref to this page: it will be released in sh_detach_old_tables()
+      * or the next call to set_toplevel_shadow() */
+-    if ( !sh_get_ref(d, smfn, 0) )
++    if ( sh_get_ref(d, smfn, 0) )
++        new_entry = pagetable_from_mfn(smfn);
++    else
+     {
+         SHADOW_ERROR("can't install %#lx as toplevel shadow\n", mfn_x(smfn));
+         domain_crash(d);
++        new_entry = pagetable_null();
+     }
+ 
+-    new_entry = pagetable_from_mfn(smfn);
+-
+  install_new_entry:
+     /* Done.  Install it */
+     SHADOW_PRINTK("%u/%u [%u] gmfn %#"PRI_mfn" smfn %#"PRI_mfn"\n",
diff --git a/emulators/xen-kernel/files/xsa251-4.8.patch b/emulators/xen-kernel/files/xsa251-4.8.patch
new file mode 100644
index 000000000000..fffe54d0e10a
--- /dev/null
+++ b/emulators/xen-kernel/files/xsa251-4.8.patch
@@ -0,0 +1,21 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86/paging: don't unconditionally BUG() on finding SHARED_M2P_ENTRY
+
+PV guests can fully control the values written into the P2M.
+
+This is XSA-251.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+--- a/xen/arch/x86/mm/paging.c
++++ b/xen/arch/x86/mm/paging.c
+@@ -276,7 +276,7 @@ void paging_mark_pfn_dirty(struct domain
+         return;
+ 
+     /* Shared MFNs should NEVER be marked dirty */
+-    BUG_ON(SHARED_M2P(pfn));
++    BUG_ON(paging_mode_translate(d) && SHARED_M2P(pfn));
+ 
+     /*
+      * Values with the MSB set denote MFNs that aren't really part of the
author	royger <royger@FreeBSD.org>	2018-01-24 00:23:57 +0800
committer	royger <royger@FreeBSD.org>	2018-01-24 00:23:57 +0800
commit	eb65a9021abbcb4b1ee08b2926724a3caa227a59 (patch)
tree	f089cc1da5298a01dc1290f561b5d38a7cba6b97
parent	33ece822bf3b7326a7f0f3adafe553e523447a27 (diff)
download	freebsd-ports-gnome-eb65a9021abbcb4b1ee08b2926724a3caa227a59.tar.gz freebsd-ports-gnome-eb65a9021abbcb4b1ee08b2926724a3caa227a59.tar.zst freebsd-ports-gnome-eb65a9021abbcb4b1ee08b2926724a3caa227a59.zip