[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[gnumach] 01/02: New upstream version 1.7+git20160921
From: |
Samuel Thibault |
Subject: |
[gnumach] 01/02: New upstream version 1.7+git20160921 |
Date: |
Wed, 21 Sep 2016 00:01:23 +0000 |
This is an automated email from the git hooks/post-receive script.
sthibault pushed a commit to branch master
in repository gnumach.
commit bc2d3a9079a479120fd19433ea373ec44f83d10f
Author: Samuel Thibault <address@hidden>
Date: Tue Sep 20 22:58:24 2016 +0000
New upstream version 1.7+git20160921
---
ChangeLog | 520 +++++++++++++++
Makefile.am | 4 +-
Makefile.in | 30 +-
configure | 20 +-
device/ds_routines.c | 4 +-
doc/mach.info | 242 +++----
doc/mach.info-1 | 26 +-
doc/mach.info-2 | 6 +-
doc/mach.texi | 2 +-
doc/stamp-vti | 8 +-
doc/version.texi | 8 +-
i386/Makefrag.am | 1 +
i386/i386/db_trace.c | 19 +-
i386/i386/locore.S | 2 +
i386/i386/model_dep.h | 7 -
i386/i386/phys.c | 22 +-
i386/i386/strings.c | 150 +++++
i386/i386/trap.c | 10 -
i386/i386at/biosmem.c | 586 +++++++++-------
i386/i386at/biosmem.h | 47 +-
i386/i386at/boothdr.S | 4 +-
i386/i386at/interrupt.S | 1 +
i386/i386at/mem.c | 22 +-
i386/i386at/model_dep.c | 93 ++-
i386/include/mach/i386/asm.h | 14 +-
i386/intel/pmap.c | 150 ++---
i386/intel/pmap.h | 12 +-
i386/xen/xen.c | 2 +-
ipc/ipc_init.c | 2 +-
ipc/mach_debug.c | 3 +
kern/lock.c | 3 +
kern/slab.c | 4 +-
kern/startup.c | 1 -
kern/strings.c | 103 +++
kern/task.c | 2 +-
kern/thread.c | 6 +-
kern/thread.h | 5 +-
linux/dev/glue/block.c | 35 +-
linux/dev/glue/net.c | 63 +-
linux/dev/init/main.c | 2 +-
version.m4 | 2 +-
vm/pmap.h | 32 +-
vm/vm_fault.c | 8 +-
vm/vm_kern.c | 8 +-
vm/vm_kern.h | 2 +-
vm/vm_map.c | 124 ++--
vm/vm_map.h | 16 +-
vm/vm_page.c | 1507 +++++++++++++++++++++++++++++++++++++++---
vm/vm_page.h | 184 +++---
vm/vm_pageout.c | 690 +++----------------
vm/vm_pageout.h | 4 +-
vm/vm_resident.c | 385 +++--------
xen/block.c | 51 +-
xen/console.c | 4 +-
xen/net.c | 49 +-
55 files changed, 3388 insertions(+), 1919 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index dce289c..74da3e8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,523 @@
+2016-09-21 Richard Braun <address@hidden>
+
+ Enable high memory
+ * i386/i386at/biosmem.c (biosmem_setup): Load the HIGHMEM segment if
+ present.
+ (biosmem_free_usable): Report high memory as usable.
+ * vm/vm_page.c (vm_page_boot_table_size, vm_page_table_size,
+ vm_page_mem_size, vm_page_mem_free): Scan all segments.
+ * vm/vm_resident.c (vm_page_grab): Describe allocation strategy
+ with regard to the HIGHMEM segment.
+
+2016-09-21 Richard Braun <address@hidden>
+
+ Update device drivers for highmem support
+ Unconditionally use bounce buffers for now.
+
+ * linux/dev/glue/net.c (device_write): Unconditionally use a
+ bounce buffer.
+ * xen/block.c (device_write): Likewise.
+ * xen/net.c: Include <device/ds_routines.h>.
+ (device_write): Unconditionally use a bounce buffer.
+
+2016-09-21 Richard Braun <address@hidden>
+
+ Update Linux block layer glue code
+ The Linux block layer glue code needs to use page nodes with the
+ appropriate interface since their redefinition as struct list.
+
+ * linux/dev/glue/block.c: Include <kern/list.h>.
+ (struct temp_data): Define member `pages' as a struct list.
+ (alloc_buffer): Update to use list_xxx functions.
+ (free_buffer, INIT_DATA, device_open, device_read): Likewise.
+
+2016-09-21 Richard Braun <address@hidden>
+
+ Rework pageout to handle multiple segments
+ As we're about to use a new HIGHMEM segment, potentially much larger
+ than the existing DMA and DIRECTMAP ones, it's now compulsory to make
+ the pageout daemon aware of those segments.
+
+ And while we're at it, let's fix some of the defects that have been
+ plaguing pageout forever, such as throttling, and pageout of internal
+ versus external pages (this commit notably introduces a hardcoded
+ policy in which as many external pages are selected before considering
+ internal pages).
+
+ * kern/slab.c (kmem_pagefree_physmem): Update call to vm_page_release.
+ * vm/vm_page.c: Include <kern/counters.h> and <vm/vm_pageout.h>.
+ (VM_PAGE_SEG_THRESHOLD_MIN_NUM, VM_PAGE_SEG_THRESHOLD_MIN_DENOM,
+ VM_PAGE_SEG_THRESHOLD_MIN, VM_PAGE_SEG_THRESHOLD_LOW_NUM,
+ VM_PAGE_SEG_THRESHOLD_LOW_DENOM, VM_PAGE_SEG_THRESHOLD_LOW,
+ VM_PAGE_SEG_THRESHOLD_HIGH_NUM, VM_PAGE_SEG_THRESHOLD_HIGH_DENOM,
+ VM_PAGE_SEG_THRESHOLD_HIGH, VM_PAGE_SEG_MIN_PAGES,
+ VM_PAGE_HIGH_ACTIVE_PAGE_NUM, VM_PAGE_HIGH_ACTIVE_PAGE_DENOM): New
macros.
+ (struct vm_page_queue): New type.
+ (struct vm_page_seg): Add new members `min_free_pages',
`low_free_pages',
+ `high_free_pages', `active_pages', `nr_active_pages',
`high_active_pages',
+ `inactive_pages', `nr_inactive_pages'.
+ (vm_page_alloc_paused): New variable.
+ (vm_page_pageable, vm_page_can_move, vm_page_remove_mappings): New
functions.
+ (vm_page_seg_alloc_from_buddy): Pause allocations and start the pageout
+ daemon as appropriate.
+ (vm_page_queue_init, vm_page_queue_push, vm_page_queue_remove,
+ vm_page_queue_first, vm_page_seg_get, vm_page_seg_index,
+ vm_page_seg_compute_pageout_thresholds): New functions.
+ (vm_page_seg_init): Initialize the new segment members.
+ (vm_page_seg_add_active_page, vm_page_seg_remove_active_page,
+ vm_page_seg_add_inactive_page, vm_page_seg_remove_inactive_page,
+ vm_page_seg_pull_active_page, vm_page_seg_pull_inactive_page,
+ vm_page_seg_pull_cache_page): New functions.
+ (vm_page_seg_min_page_available, vm_page_seg_page_available,
+ vm_page_seg_usable, vm_page_seg_double_lock, vm_page_seg_double_unlock,
+ vm_page_seg_balance_page, vm_page_seg_balance, vm_page_seg_evict,
+ vm_page_seg_compute_high_active_page, vm_page_seg_refill_inactive,
+ vm_page_lookup_seg, vm_page_check): New functions.
+ (vm_page_alloc_pa): Handle allocation failure from VM privileged thread.
+ (vm_page_info_all): Display additional segment properties.
+ (vm_page_wire, vm_page_unwire, vm_page_deactivate, vm_page_activate,
+ vm_page_wait): Move from vm/vm_resident.c and rewrite to use segments.
+ (vm_page_queues_remove, vm_page_check_usable, vm_page_may_balance,
+ vm_page_balance_once, vm_page_balance, vm_page_evict_once): New
functions.
+ (VM_PAGE_MAX_LAUNDRY, VM_PAGE_MAX_EVICTIONS): New macros.
+ (vm_page_evict, vm_page_refill_inactive): New functions.
+ * vm/vm_page.h: Include <kern/list.h>.
+ (struct vm_page): Remove member `pageq', reuse the `node' member
instead,
+ move the `listq' and `next' members above `vm_page_header'.
+ (VM_PAGE_CHECK): Define as an alias to vm_page_check.
+ (vm_page_check): New function declaration.
+ (vm_page_queue_fictitious, vm_page_queue_active, vm_page_queue_inactive,
+ vm_page_free_target, vm_page_free_min, vm_page_inactive_target,
+ vm_page_free_reserved, vm_page_free_wanted): Remove extern declarations.
+ (vm_page_external_pagedout): New extern declaration.
+ (vm_page_release): Update declaration.
+ (VM_PAGE_QUEUES_REMOVE): Define as an alias to vm_page_queues_remove.
+ (VM_PT_PMAP, VM_PT_KMEM, VM_PT_STACK): Remove macros.
+ (VM_PT_KERNEL): Update value.
+ (vm_page_queues_remove, vm_page_balance, vm_page_evict,
+ vm_page_refill_inactive): New function declarations.
+ * vm/vm_pageout.c (VM_PAGEOUT_BURST_MAX, VM_PAGEOUT_BURST_MIN,
+ VM_PAGEOUT_BURST_WAIT, VM_PAGEOUT_EMPTY_WAIT, VM_PAGEOUT_PAUSE_MAX,
+ VM_PAGE_INACTIVE_TARGET, VM_PAGE_FREE_TARGET, VM_PAGE_FREE_MIN,
+ VM_PAGE_FREE_RESERVED, VM_PAGEOUT_RESERVED_INTERNAL,
+ VM_PAGEOUT_RESERVED_REALLY): Remove macros.
+ (vm_pageout_reserved_internal, vm_pageout_reserved_really,
+ vm_pageout_burst_max, vm_pageout_burst_min, vm_pageout_burst_wait,
+ vm_pageout_empty_wait, vm_pageout_pause_count, vm_pageout_pause_max,
+ vm_pageout_active, vm_pageout_inactive, vm_pageout_inactive_nolock,
+ vm_pageout_inactive_busy, vm_pageout_inactive_absent,
+ vm_pageout_inactive_used, vm_pageout_inactive_clean,
+ vm_pageout_inactive_dirty, vm_pageout_inactive_double,
+ vm_pageout_inactive_cleaned_external): Remove variables.
+ (vm_pageout_requested, vm_pageout_continue): New variables.
+ (vm_pageout_setup): Wait for page allocation to succeed instead of
+ falling back to flush, update double paging protocol with caller,
+ add pageout throttling setup.
+ (vm_pageout_scan): Rewrite to use the new vm_page balancing,
+ eviction and inactive queue refill functions.
+ (vm_pageout_scan_continue, vm_pageout_continue): Remove functions.
+ (vm_pageout): Rewrite.
+ (vm_pageout_start, vm_pageout_resume): New functions.
+ * vm/vm_pageout.h (vm_pageout_continue, vm_pageout_scan_continue):
Remove
+ function declarations.
+ (vm_pageout_start, vm_pageout_resume): New function declarations.
+ * vm/vm_resident.c: Include <kern/list.h>.
+ (vm_page_queue_fictitious): Define as a struct list.
+ (vm_page_free_wanted, vm_page_external_count, vm_page_free_avail,
+ vm_page_queue_active, vm_page_queue_inactive, vm_page_free_target,
+ vm_page_free_min, vm_page_inactive_target, vm_page_free_reserved):
+ Remove variables.
+ (vm_page_external_pagedout): New variable.
+ (vm_page_bootstrap): Don't initialize removed variable, update
+ initialization of vm_page_queue_fictitious.
+ (vm_page_replace): Call VM_PAGE_QUEUES_REMOVE where appropriate.
+ (vm_page_remove): Likewise.
+ (vm_page_grab_fictitious): Update to use list_xxx functions.
+ (vm_page_release_fictitious): Likewise.
+ (vm_page_grab): Remove pageout related code.
+ (vm_page_release): Add `laundry' and `external' parameters for
+ pageout throttling.
+ (vm_page_grab_contig): Remove pageout related code.
+ (vm_page_free_contig): Likewise.
+ (vm_page_free): Remove pageout related code, update call to
+ vm_page_release.
+ (vm_page_wait, vm_page_wire, vm_page_unwire, vm_page_deactivate,
+ vm_page_activate): Move to vm/vm_page.c.
+
+2016-09-21 Richard Braun <address@hidden>
+
+ Redefine what an external page is
+ Instead of a "page considered external", which apparently takes into
+ account whether a page is dirty or not, redefine this property to
+ reliably mean "is in an external object".
+
+ This commit mostly deals with the impact of this change on the page
+ allocation interface.
+
+ * i386/intel/pmap.c (pmap_page_table_page_alloc): Update call to
+ vm_page_grab.
+ * kern/slab.c (kmem_pagealloc_physmem): Use vm_page_grab instead of
+ vm_page_grab_contig.
+ (kmem_pagefree_physmem): Use vm_page_release instead of
+ vm_page_free_contig.
+ * linux/dev/glue/block.c (alloc_buffer, device_read): Update call
+ to vm_page_grab.
+ * vm/vm_fault.c (vm_fault_page): Update calls to vm_page_grab and
+ vm_page_convert.
+ * vm/vm_map.c (vm_map_copy_steal_pages): Update call to vm_page_grab.
+ * vm/vm_page.h (struct vm_page): Remove `extcounted' member.
+ (vm_page_external_limit, vm_page_external_count): Remove extern
+ declarations.
+ (vm_page_convert, vm_page_grab): Update declarations.
+ (vm_page_release, vm_page_grab_phys_addr): New function declarations.
+ * vm/vm_pageout.c (VM_PAGE_EXTERNAL_LIMIT): Remove macro.
+ (VM_PAGE_EXTERNAL_TARGET): Likewise.
+ (vm_page_external_target): Remove variable.
+ (vm_pageout_scan): Remove specific handling of external pages.
+ (vm_pageout): Don't set vm_page_external_limit and
+ vm_page_external_target.
+ * vm/vm_resident.c (vm_page_external_limit): Remove variable.
+ (vm_page_insert, vm_page_replace, vm_page_remove): Update external
+ page tracking.
+ (vm_page_convert): Remove `external' parameter.
+ (vm_page_grab): Likewise. Remove specific handling of external pages.
+ (vm_page_grab_phys_addr): Update call to vm_page_grab.
+ (vm_page_release): Remove `external' parameter and remove specific
+ handling of external pages.
+ (vm_page_wait): Remove specific handling of external pages.
+ (vm_page_alloc): Update call to vm_page_grab.
+ (vm_page_free): Update call to vm_page_release.
+ * xen/block.c (device_read): Update call to vm_page_grab.
+ * xen/net.c (device_write): Likewise.
+
+2016-09-21 Richard Braun <address@hidden>
+
+ Replace vm_offset_t with phys_addr_t where appropriate
+ * i386/i386/phys.c (pmap_zero_page, pmap_copy_page, copy_to_phys,
+ copy_from_phys, kvtophys): Use the phys_addr_t type for physical
+ addresses.
+ * i386/intel/pmap.c (pmap_map, pmap_map_bd, pmap_destroy,
+ pmap_remove_range, pmap_page_protect, pmap_enter, pmap_extract,
+ pmap_collect, phys_attribute_clear, phys_attribute_test,
+ pmap_clear_modify, pmap_is_modified, pmap_clear_reference,
+ pmap_is_referenced): Likewise.
+ * i386/intel/pmap.h (pt_entry_t): Unconditionally define as a
+ phys_addr_t.
+ (pmap_zero_page, pmap_copy_page, kvtophys): Use the phys_addr_t
+ type for physical addresses.
+ * vm/pmap.h (pmap_enter, pmap_page_protect, pmap_clear_reference,
+ pmap_is_referenced, pmap_clear_modify, pmap_is_modified,
+ pmap_extract, pmap_map_bd): Likewise.
+ * vm/vm_page.h (vm_page_fictitious_addr): Declare as a phys_addr_t.
+ * vm/vm_resident.c (vm_page_fictitious_addr): Likewise.
+ (vm_page_grab_phys_addr): Change return type to phys_addr_t.
+
+2016-09-21 Richard Braun <address@hidden>
+
+ Remove phys_first_addr and phys_last_addr global variables
+ The old assumption that all physical memory is directly mapped in
+ kernel space is about to go away. Those variables are directly linked
+ to that assumption.
+
+ * i386/i386/model_dep.h (phys_first_addr): Remove extern declaration.
+ (phys_last_addr): Likewise.
+ * i386/i386/phys.c (pmap_zero_page): Use VM_PAGE_DIRECTMAP_LIMIT
+ instead of phys_last_addr.
+ (pmap_copy_page, copy_to_phys, copy_from_phys): Likewise.
+ * i386/i386/trap.c (user_trap): Remove check against phys_last_addr.
+ * i386/i386at/biosmem.c (biosmem_bootstrap_common): Don't set
+ phys_last_addr.
+ * i386/i386at/mem.c (memmmap): Use vm_page_lookup_pa to determine if
+ a physical address references physical memory.
+ * i386/i386at/model_dep.c (phys_first_addr): Remove variable.
+ (phys_last_addr): Likewise.
+ (pmap_free_pages, pmap_valid_page): Remove functions.
+ * i386/intel/pmap.c: Include i386at/biosmem.h.
+ (pa_index): Turn into an alias for vm_page_table_index.
+ (pmap_bootstrap): Replace uses of phys_first_addr and phys_last_addr
+ as appropriate.
+ (pmap_virtual_space): Use vm_page_table_size instead of phys_first_addr
+ and phys_last_addr to obtain the number of physical pages.
+ (pmap_verify_free): Remove function.
+ (valid_page): Turn this macro into an inline function and rewrite
+ using vm_page_lookup_pa.
+ (pmap_page_table_page_alloc): Build the pmap VM object using
+ vm_page_table_size to determine its size.
+ (pmap_remove_range, pmap_page_protect, phys_attribute_clear,
+ phys_attribute_test): Turn page indexes into unsigned long integers.
+ (pmap_enter): Likewise. In addition, use either vm_page_lookup_pa or
+ biosmem_directmap_end to determine if a physical address references
+ physical memory.
+ * i386/xen/xen.c (hyp_p2m_init): Use vm_page_table_size instead of
+ phys_last_addr to obtain the number of physical pages.
+ * kern/startup.c (phys_first_addr): Remove extern declaration.
+ (phys_last_addr): Likewise.
+ * linux/dev/init/main.c (linux_init): Use vm_page_seg_end with the
+ appropriate segment selector instead of phys_last_addr to determine
+ where high memory starts.
+ * vm/pmap.h: Update requirements description.
+ (pmap_free_pages, pmap_valid_page): Remove declarations.
+ * vm/vm_page.c (vm_page_seg_end, vm_page_boot_table_size,
+ vm_page_table_size, vm_page_table_index): New functions.
+ * vm/vm_page.h (vm_page_seg_end, vm_page_table_size,
+ vm_page_table_index): New function declarations.
+ * vm/vm_resident.c (vm_page_bucket_count, vm_page_hash_mask): Define
+ as unsigned long integers.
+ (vm_page_bootstrap): Compute VP table size based on the page table
+ size instead of the value returned by pmap_free_pages.
+
+2016-09-20 Richard Braun <address@hidden>
+
+ VM: remove commented out code
+ The vm_page_direct_va, vm_page_direct_pa and vm_page_direct_ptr
+ functions were imported along with the new vm_page module, but
+ never actually used since the kernel already has phystokv and
+ kvtophys functions.
+
+2016-09-16 Richard Braun <address@hidden>
+
+ VM: improve pageout deadlock workaround
+ Commit 5dd4f67522ad0d49a2cecdb9b109251f546d4dd1 makes VM map entry
+ allocation done with VM privilege, so that a VM map isn't held locked
+ while physical allocations are paused, which may block the default
+ pager during page eviction, causing a system-wide deadlock.
+
+ First, it turns out that map entries aren't the only buffers allocated,
+ and second, their number can't be easily determined, which makes a
+ preallocation strategy very hard to implement.
+
+ This change generalizes the strategy of VM privilege increase when a
+ VM map is locked.
+
+ * device/ds_routines.c (io_done_thread): Use integer values instead
+ of booleans when setting VM privilege.
+ * kern/thread.c (thread_init, thread_wire): Likewise.
+ * vm/vm_pageout.c (vm_pageout): Likewise.
+ * kern/thread.h (struct thread): Turn member `vm_privilege' into an
+ unsigned integer.
+ * vm/vm_map.c (vm_map_lock): New function, where VM privilege is
+ temporarily increased.
+ (vm_map_unlock): New function, where VM privilege is decreased.
+ (_vm_map_entry_create): Remove VM privilege workaround from this
+ function.
+ * vm/vm_map.h (vm_map_lock, vm_map_unlock): Turn into functions.
+
+2016-09-11 Samuel Thibault <address@hidden>
+
+ Fix spurious warning
+ * i386/i386/db_trace.c (db_i386_stack_trace): Do not check for frame
+ validity if it is 0.
+
+ Fix size of functions interrupt and syscall
+ * i386/i386/locore.S (syscall): Add END(syscall).
+ * i386/i386at/interrupt.S (interrupt): Add END(interrupt).
+
+ Set function type on symbols created by ENTRY macro
+ * i386/include/mach/i386/asm.h (ENTRY, ENTRY2, ASENTRY, Entry): Use
.type
+ @function on created entries.
+
+ Close parenthesis
+ * i386/i386/db_trace.c (db_i386_stack_trace): When stopping on zero
frame,
+ close parameters parenthesis.
+
+ Fix exploring stack trace up to assembly
+ * i386/i386/db_trace.c (db_i386_stack_trace): Do not stop as soon as
frame
+ is 0, lookup PC first, and stop only before accessing the frame content.
+
+2016-09-11 Justus Winter <address@hidden>
+
+ ipc: Fix crash in debug code.
+ * ipc/mach_debug.c (mach_port_kernel_object): Check that the receiver
+ is valid.
+
+2016-09-07 Richard Braun <address@hidden>
+
+ Remove map entry pageability property.
+ Since the replacement of the zone allocator, kernel objects have been
+ wired in memory. Besides, as of 5e9f6f (Stack the slab allocator
+ directly on top of the physical allocator), there is a single cache
+ used to allocate map entries.
+
+ Those changes make the pageability attribute of VM maps irrelevant.
+
+ * device/ds_routines.c (mach_device_init): Update call to kmem_submap.
+ * ipc/ipc_init.c (ipc_init): Likewise.
+ * kern/task.c (task_create): Update call to vm_map_create.
+ * vm/vm_kern.c (kmem_submap): Remove `pageable' argument. Update call
+ to vm_map_setup.
+ (kmem_init): Update call to vm_map_setup.
+ * vm/vm_kern.h (kmem_submap): Update declaration.
+ * vm/vm_map.c (vm_map_setup): Remove `pageable' argument. Don't set
+ `entries_pageable' member.
+ (vm_map_create): Likewise.
+ (vm_map_copyout): Don't bother creating copies of page entries with
+ the right pageability.
+ (vm_map_copyin): Don't set `entries_pageable' member.
+ (vm_map_fork): Update call to vm_map_create.
+ * vm/vm_map.h (struct vm_map_header): Remove `entries_pageable' member.
+ (vm_map_setup, vm_map_create): Remove `pageable' argument.
+
+2016-09-06 Richard Braun <address@hidden>
+
+ Fix registration of strings from in boot data
+ * i386/i386at/model_dep.c (register_boot_data): Use phystokv on strings
+ when computing their length.
+
+2016-09-06 Richard Braun <address@hidden>
+
+ Make early physical page allocation truely reliable
+ Import upstream biosmem changes and adjust for local modifications.
+
+ Specifically, this change makes the biosmem module reliably track all
+ boot data by storing their addresses in a sorted array. This allows
+ both the early page allocator and the biosmem_free_usable function
+ to accurately find any range of free pages.
+
+ * i386/i386at/biosmem.c: Remove inclusion of <i386at/elf.h>.
+ (_start, _end): Remove variable declarations.
+ (BIOSMEM_MAX_BOOT_DATA): New macro.
+ (struct biosmem_boot_data): New type.
+ (biosmem_boot_data_array, biosmem_nr_boot_data): New variables.
+ (biosmem_heap_start, biosmem_heap_bottom, biosmem_heap_top,
+ biosmem_heap_end): Change type to phys_addr_t.
+ (biosmem_panic_inval_boot_data): New variable.
+ (biosmem_panic_too_many_boot_data): Likewise.
+ (biosmem_panic_toobig_msg): Variable renamed ...
+ (biosmem_panic_too_big_msg): ... to this.
+ (biosmem_register_boot_data): New function.
+ (biosmem_unregister_boot_data): Likewise.
+ (biosmem_map_adjust): Update reference to panic message.
+ (biosmem_map_find_avail): Add detailed description.
+ (biosmem_save_cmdline_sizes): Remove function.
+ (biosmem_find_heap_clip): Likewise.
+ (biosmem_find_heap): Likewise.
+ (biosmem_find_avail_clip, biosmem_find_avail): New functions.
+ (biosmem_setup_allocator): Receive const multiboot info, replace
+ calls to biosmem_find_heap with calls to biosmem_find_avail and
+ update accordingly. Register the heap as boot data.
+ (biosmem_xen_bootstrap): Register the Xen boot info and the heap as
+ boot data.
+ (biosmem_bootstrap): Receive const multiboot information. Remove call
+ to biosmem_save_cmdline_sizes.
+ (biosmem_bootalloc): Remove assertion on the VM system state.
+ (biosmem_type_desc, biosmem_map_show): Build only if DEBUG is true.
+ (biosmem_unregister_temporary_boot_data): New function.
+ (biosmem_free_usable_range): Change address range format.
+ (biosmem_free_usable_entry): Rewrite to use biosmem_find_avail
+ without abusing it.
+ (biosmem_free_usable): Call biosmem_unregister_temporary_boot_data,
+ update call to biosmem_free_usable_entry.
+ * i386/i386at/biosmem.h (biosmem_register_boot_data): New function.
+ (biosmem_bootalloc): Update description.
+ (biosmem_bootstrap): Update description and declaration.
+ (biosmem_free_usable): Likewise.
+ * i386/i386at/model_dep.c: Include <i386at/elf.h>.
+ (machine_init): Update call to biosmem_free_usable.
+ (register_boot_data): New function.
+ (i386at_init): Call register_boot_data where appropriate.
+
+2016-09-03 Richard Braun <address@hidden>
+
+ Fix early physical page allocation
+ Import upstream biosmem and vm_page changes, and adjust for local
+ modifications.
+
+ Specifically, the biosmem module was mistakenly loading physical
+ segments that did not clip with the heap as completely available.
+ This change makes it load them as completely unavailable during
+ startup, and once the VM system is ready, additional pages are
+ loaded.
+
+ * i386/i386at/biosmem.c (DEBUG): New macro.
+ (struct biosmem_segment): Remove members `avail_start' and `avail_end'.
+ (biosmem_heap_cur): Remove variable.
+ (biosmem_heap_bottom, biosmem_heap_top, biosmem_heap_topdown): New
variables.
+ (biosmem_find_boot_data_update, biosmem_find_boot_data): Remove
functions.
+ (biosmem_find_heap_clip, biosmem_find_heap): New functions.
+ (biosmem_setup_allocator): Rewritten to use the new biosmem_find_heap
+ function.
+ (biosmem_bootalloc): Support both bottom-up and top-down allocations.
+ (biosmem_directmap_size): Renamed to ...
+ (biosmem_directmap_end): ... this function.
+ (biosmem_load_segment): Fix segment loading.
+ (biosmem_setup): Restrict usable memory to the directmap segment.
+ (biosmem_free_usable_range): Add checks on input parameters.
+ (biosmem_free_usable_update_start, biosmem_free_usable_start,
+ biosmem_free_usable_reserved, biosmem_free_usable_end): Remove
functions.
+ (biosmem_free_usable_entry): Rewritten to use the new biosmem_find_heap
+ function.
+ (biosmem_free_usable): Restrict usable memory to the directmap segment.
+ * i386/i386at/biosmem.h (biosmem_bootalloc): Update description.
+ (biosmem_directmap_size): Renamed to ...
+ (biosmem_directmap_end): ... this function.
+ (biosmem_free_usable): Update declaration.
+ * i386/i386at/model_dep.c (machine_init): Call biosmem_free_usable.
+ * vm/vm_page.c (DEBUG): New macro.
+ (struct vm_page_seg): New member `heap_present'.
+ (vm_page_load): Remove heap related parameters.
+ (vm_page_load_heap): New function.
+ * vm/vm_page.h (vm_page_load): Remove heap related parameters. Update
+ description.
+ (vm_page_load_heap): New function.
+
+2016-09-01 Richard Braun <address@hidden>
+
+ pmap: fix map window creation on xen
+ * i386/intel/pmap.c (pmap_get_mapwindow, pmap_put_mapwindow): Use
+ the appropriate xen hypercall if building for paravirtualized page
+ table management.
+
+2016-08-31 Samuel Thibault <address@hidden>
+
+ Avoid using non-ascii source encoding
+ * xen/console.c (hypcnintr): Replace latin1 £ character with the 0xA3
+ number.
+
+2016-08-29 Richard Braun <address@hidden>
+
+ vm: fix boot on xen
+ * vm/vm_map.c (_vm_map_entry_create: Make sure there is a thread
+ before accessing VM privilege.
+
+2016-08-26 Samuel Thibault <address@hidden>
+
+ Revert "Fix documentation for vm_map"
+ This reverts commit 57694037a02dda29bd678dc3b8531bd437682ba7.
+
+ We rather prefer the kernel just use whatever slot it sees fit.
Userland has
+ already been fixed into not using the behavior anyway.
+
+2016-08-25 Samuel Thibault <address@hidden>
+
+ Add missing memory barriers in simple lock debugging
+ * kern/lock.c (_simple_lock, _simple_lock_try, simple_unlock): Add
compiler
+ memory barrier to separate simple_locks_taken update from information
+ filling.
+
+ Use invlpg for single-page pagetable changes
+ * i386/intel/pmap.c (INVALIDATE_TLB): When e-s is constant, equal to
+ PAGE_SIZE, use just one invlpg instruction to flush the TLB.
+
+ Drop unused macro
+ * i386/intel/pmap.c (MAX_TBIS_SIZE): Drop unused macro.
+
+2016-08-16 Richard Braun <address@hidden>
+
+ Replace libc string functions with internal implementations
+ * Makefile.am (clib_routines): Remove memcmp, memcpy, memmove,
+ strchr, strstr and strsep.
+ * kern/strings.c (memset): Comment out.
+ (strchr, strsep, strstr): New functions.
+
+ i386: import string functions from X15 and relicense to GPLv2+
+ * i386/Makefrag.am (libkernel_a_SOURCES): Add i386/i386/strings.c.
+ * i386/i386/strings.c: New file.
+
+2016-08-12 Richard Braun <address@hidden>
+
+ i386: request the boot loader to page-align modules
+ * i386/i386at/boothdr.S (MULTIBOOT_FLAGS): Set LSB bit.
+
2016-08-07 Richard Braun <address@hidden>
VM: fix pageout-related deadlock
diff --git a/Makefile.am b/Makefile.am
index bbcfc11..50ff6b6 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -161,9 +161,7 @@ noinst_PROGRAMS += \
gnumach.o
# This is the list of routines we decide is OK to steal from the C library.
-clib_routines := memcmp memcpy memmove \
- strchr strstr strsep strtok \
- htonl htons ntohl ntohs \
+clib_routines := htonl htons ntohl ntohs \
udivdi3 __udivdi3 __umoddi3 \
__rel_iplt_start __rel_iplt_end \
__ffsdi2 \
diff --git a/Makefile.in b/Makefile.in
index f38eef3..cf0fa1b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -1076,6 +1076,7 @@ noinst_PROGRAMS = gnumach.o$(EXEEXT)
@HOST_ix86_TRUE@ i386/i386/setjmp.h \
@HOST_ix86_TRUE@ i386/i386/spl.S \
@HOST_ix86_TRUE@ i386/i386/spl.h \
address@hidden@ i386/i386/strings.c \
@HOST_ix86_TRUE@ i386/i386/task.h \
@HOST_ix86_TRUE@ i386/i386/thread.h \
@HOST_ix86_TRUE@ i386/i386/time_stamp.h \
@@ -1353,16 +1354,17 @@ am__libkernel_a_SOURCES_DIST = ddb/db_access.c
ddb/db_access.h \
i386/i386/pio.h i386/i386/pmap.h i386/i386/proc_reg.h \
i386/i386/sched_param.h i386/i386/seg.c i386/i386/seg.h \
i386/i386/setjmp.h i386/i386/spl.S i386/i386/spl.h \
- i386/i386/task.h i386/i386/thread.h i386/i386/time_stamp.h \
- i386/i386/trap.c i386/i386/trap.h i386/i386/tss.h \
- i386/i386/user_ldt.c i386/i386/user_ldt.h i386/i386/vm_param.h \
- i386/i386/xpr.h i386/intel/pmap.c i386/intel/pmap.h \
- i386/intel/read_fault.c i386/intel/read_fault.h \
- i386/i386/hardclock.c i386/i386/hardclock.h i386/i386/io_map.c \
- i386/i386/pic.c i386/i386/pic.h i386/i386/pit.c \
- i386/i386/pit.h i386/i386/_setjmp.S chips/busses.c \
- chips/busses.h device/cirbuf.c i386/xen/xen.c \
- i386/xen/xen_locore.S i386/xen/xen_boothdr.S i386/i386/xen.h
+ i386/i386/strings.c i386/i386/task.h i386/i386/thread.h \
+ i386/i386/time_stamp.h i386/i386/trap.c i386/i386/trap.h \
+ i386/i386/tss.h i386/i386/user_ldt.c i386/i386/user_ldt.h \
+ i386/i386/vm_param.h i386/i386/xpr.h i386/intel/pmap.c \
+ i386/intel/pmap.h i386/intel/read_fault.c \
+ i386/intel/read_fault.h i386/i386/hardclock.c \
+ i386/i386/hardclock.h i386/i386/io_map.c i386/i386/pic.c \
+ i386/i386/pic.h i386/i386/pit.c i386/i386/pit.h \
+ i386/i386/_setjmp.S chips/busses.c chips/busses.h \
+ device/cirbuf.c i386/xen/xen.c i386/xen/xen_locore.S \
+ i386/xen/xen_boothdr.S i386/i386/xen.h
@address@hidden = ddb/db_access.$(OBJEXT) \
@enable_kdb_TRUE@ ddb/db_aout.$(OBJEXT) ddb/db_elf.$(OBJEXT) \
@enable_kdb_TRUE@ ddb/db_break.$(OBJEXT) \
@@ -1433,6 +1435,7 @@ am__libkernel_a_SOURCES_DIST = ddb/db_access.c
ddb/db_access.h \
@HOST_ix86_TRUE@ i386/i386/phys.$(OBJEXT) \
@HOST_ix86_TRUE@ i386/i386/seg.$(OBJEXT) \
@HOST_ix86_TRUE@ i386/i386/spl.$(OBJEXT) \
address@hidden@ i386/i386/strings.$(OBJEXT) \
@HOST_ix86_TRUE@ i386/i386/trap.$(OBJEXT) \
@HOST_ix86_TRUE@ i386/i386/user_ldt.$(OBJEXT) \
@HOST_ix86_TRUE@ i386/intel/pmap.$(OBJEXT) \
@@ -2941,9 +2944,7 @@ gnumach_o_SOURCES =
gnumach_o_LINK = $(LD) -u _start -r -o $@
# This is the list of routines we decide is OK to steal from the C library.
-clib_routines := memcmp memcpy memmove \
- strchr strstr strsep strtok \
- htonl htons ntohl ntohs \
+clib_routines := htonl htons ntohl ntohs \
udivdi3 __udivdi3 __umoddi3 \
__rel_iplt_start __rel_iplt_end \
__ffsdi2 \
@@ -3422,6 +3423,8 @@ i386/i386/seg.$(OBJEXT): i386/i386/$(am__dirstamp) \
i386/i386/$(DEPDIR)/$(am__dirstamp)
i386/i386/spl.$(OBJEXT): i386/i386/$(am__dirstamp) \
i386/i386/$(DEPDIR)/$(am__dirstamp)
+i386/i386/strings.$(OBJEXT): i386/i386/$(am__dirstamp) \
+ i386/i386/$(DEPDIR)/$(am__dirstamp)
i386/i386/trap.$(OBJEXT): i386/i386/$(am__dirstamp) \
i386/i386/$(DEPDIR)/$(am__dirstamp)
i386/i386/user_ldt.$(OBJEXT): i386/i386/$(am__dirstamp) \
@@ -4242,6 +4245,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @address@hidden/i386/$(DEPDIR)/address@hidden@
@AMDEP_TRUE@@am__include@ @address@hidden/i386/$(DEPDIR)/address@hidden@
@AMDEP_TRUE@@am__include@ @address@hidden/i386/$(DEPDIR)/address@hidden@
address@hidden@@am__include@ @address@hidden/i386/$(DEPDIR)/address@hidden@
@AMDEP_TRUE@@am__include@ @address@hidden/i386/$(DEPDIR)/address@hidden@
@AMDEP_TRUE@@am__include@ @address@hidden/i386/$(DEPDIR)/address@hidden@
@AMDEP_TRUE@@am__include@ @address@hidden/i386at/$(DEPDIR)/address@hidden@
diff --git a/configure b/configure
index b529a51..282252c 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for GNU Mach 1.7+git20160809.
+# Generated by GNU Autoconf 2.69 for GNU Mach 1.7+git20160921.
#
# Report bugs to <address@hidden>.
#
@@ -579,8 +579,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='GNU Mach'
PACKAGE_TARNAME='gnumach'
-PACKAGE_VERSION='1.7+git20160809'
-PACKAGE_STRING='GNU Mach 1.7+git20160809'
+PACKAGE_VERSION='1.7+git20160921'
+PACKAGE_STRING='GNU Mach 1.7+git20160921'
PACKAGE_BUGREPORT='address@hidden'
PACKAGE_URL=''
@@ -1599,7 +1599,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures GNU Mach 1.7+git20160809 to adapt to many kinds of
systems.
+\`configure' configures GNU Mach 1.7+git20160921 to adapt to many kinds of
systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1670,7 +1670,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of GNU Mach 1.7+git20160809:";;
+ short | recursive ) echo "Configuration of GNU Mach 1.7+git20160921:";;
esac
cat <<\_ACEOF
@@ -2026,7 +2026,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-GNU Mach configure 1.7+git20160809
+GNU Mach configure 1.7+git20160921
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2118,7 +2118,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by GNU Mach $as_me 1.7+git20160809, which was
+It was created by GNU Mach $as_me 1.7+git20160921, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2984,7 +2984,7 @@ fi
# Define the identity of the package.
PACKAGE='gnumach'
- VERSION='1.7+git20160809'
+ VERSION='1.7+git20160921'
# Some tools Automake needs.
@@ -12189,7 +12189,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by GNU Mach $as_me 1.7+git20160809, which was
+This file was extended by GNU Mach $as_me 1.7+git20160921, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -12260,7 +12260,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //;
s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-GNU Mach config.status 1.7+git20160809
+GNU Mach config.status 1.7+git20160921
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
diff --git a/device/ds_routines.c b/device/ds_routines.c
index 6b6dcb0..1fabec3 100644
--- a/device/ds_routines.c
+++ b/device/ds_routines.c
@@ -1512,7 +1512,7 @@ void io_done_thread(void)
/*
* Set thread privileges and highest priority.
*/
- current_thread()->vm_privilege = TRUE;
+ current_thread()->vm_privilege = 1;
stack_privilege(current_thread());
thread_set_own_priority(0);
@@ -1532,7 +1532,7 @@ void mach_device_init(void)
simple_lock_init(&io_done_list_lock);
kmem_submap(device_io_map, kernel_map, &device_io_min, &device_io_max,
- DEVICE_IO_MAP_SIZE, FALSE);
+ DEVICE_IO_MAP_SIZE);
/*
* If the kernel receives many device_write requests, the
diff --git a/doc/mach.info b/doc/mach.info
index a728886..dc3faa6 100644
--- a/doc/mach.info
+++ b/doc/mach.info
@@ -1,9 +1,9 @@
-This is mach.info, produced by makeinfo version 6.1 from mach.texi.
+This is mach.info, produced by makeinfo version 6.3 from mach.texi.
This file documents the GNU Mach microkernel.
- This is edition 0.4, last updated on 20 April 2016, of 'The GNU Mach
-Reference Manual', for version 1.7+git20160809.
+ This is edition 0.4, last updated on 31 August 2016, of 'The GNU Mach
+Reference Manual', for version 1.7+git20160921.
Copyright (C) 2001, 2002, 2006, 2007, 2008 Free Software Foundation,
Inc.
@@ -39,126 +39,126 @@ END-INFO-DIR-ENTRY
Indirect:
-mach.info-1: 1639
-mach.info-2: 302600
+mach.info-1: 1640
+mach.info-2: 302586
Tag Table:
(Indirect)
-Node: Top1639
-Node: Introduction11276
-Node: Audience12107
-Node: Features13142
-Node: Overview14969
-Node: History16162
-Node: Installing16307
-Node: Binary Distributions17532
-Node: Compilation18340
-Node: Configuration19573
-Node: Cross-Compilation35984
-Node: Bootstrap36765
-Ref: Bootstrap-Footnote-137208
-Node: Bootloader37445
-Ref: Bootloader-Footnote-138725
-Node: Modules38811
-Node: Inter Process Communication39638
-Node: Major Concepts40261
-Node: Messaging Interface44066
-Node: Mach Message Call44796
-Node: Message Format48111
-Node: Exchanging Port Rights59303
-Ref: Exchanging Port Rights-Footnote-164865
-Node: Memory65037
-Ref: Memory-Footnote-168131
-Node: Message Send68473
-Ref: Message Send-Footnote-175495
-Node: Message Receive75778
-Ref: Message Receive-Footnote-185430
-Node: Atomicity85711
-Node: Port Manipulation Interface88485
-Node: Port Creation90040
-Node: Port Destruction94829
-Node: Port Names97972
-Node: Port Rights102219
-Node: Ports and other Tasks106008
-Node: Receive Rights110101
-Node: Port Sets117032
-Node: Request Notifications119435
-Node: Inherited Ports124239
-Node: Virtual Memory Interface127923
-Node: Memory Allocation129176
-Node: Memory Deallocation131701
-Node: Data Transfer133165
-Node: Memory Attributes136691
-Node: Mapping Memory Objects146130
-Node: Memory Statistics149439
-Node: External Memory Management151013
-Node: Memory Object Server151718
-Node: Memory Object Creation154427
-Node: Memory Object Termination158475
-Node: Memory Objects and Data161414
-Node: Memory Object Locking178560
-Node: Memory Object Attributes184455
-Node: Default Memory Manager190292
-Node: Threads and Tasks196014
-Node: Thread Interface196351
-Node: Thread Creation197347
-Node: Thread Termination198464
-Node: Thread Information198935
-Node: Thread Settings205034
-Node: Thread Execution206268
-Node: Scheduling213561
-Node: Thread Priority213916
-Node: Hand-Off Scheduling216550
-Node: Scheduling Policy221675
-Node: Thread Special Ports223007
-Node: Exceptions225453
-Node: Task Interface226323
-Node: Task Creation227335
-Node: Task Termination228670
-Node: Task Information229272
-Node: Task Execution236174
-Node: Task Special Ports240587
-Node: Syscall Emulation244441
-Node: Profiling245672
-Node: Host Interface249435
-Node: Host Ports250420
-Node: Host Information252493
-Node: Host Time257876
-Node: Host Reboot260543
-Node: Processors and Processor Sets261095
-Node: Processor Set Interface262073
-Node: Processor Set Ports262840
-Node: Processor Set Access263670
-Node: Processor Set Creation265930
-Node: Processor Set Destruction266957
-Node: Tasks and Threads on Sets267878
-Node: Processor Set Priority273045
-Node: Processor Set Policy274335
-Node: Processor Set Info275949
-Node: Processor Interface279762
-Node: Hosted Processors280487
-Node: Processor Control281478
-Node: Processors and Sets282944
-Node: Processor Info284822
-Node: Device Interface287564
-Node: Device Reply Server289179
-Node: Device Open290471
-Node: Device Close292594
-Node: Device Read293173
-Node: Device Write296092
-Node: Device Map298897
-Node: Device Status299788
-Node: Device Filter302600
-Node: Kernel Debugger308347
-Node: Operation309074
-Node: Commands312051
-Node: Variables325836
-Node: Expressions327224
-Node: Copying328573
-Node: Documentation License347802
-Node: GNU Free Documentation License348391
-Node: CMU License370790
-Node: Concept Index372025
-Node: Function and Data Index375871
+Node: Top1640
+Node: Introduction11278
+Node: Audience12109
+Node: Features13144
+Node: Overview14971
+Node: History16164
+Node: Installing16309
+Node: Binary Distributions17534
+Node: Compilation18342
+Node: Configuration19575
+Node: Cross-Compilation35986
+Node: Bootstrap36767
+Ref: Bootstrap-Footnote-137210
+Node: Bootloader37447
+Ref: Bootloader-Footnote-138727
+Node: Modules38813
+Node: Inter Process Communication39640
+Node: Major Concepts40263
+Node: Messaging Interface44068
+Node: Mach Message Call44798
+Node: Message Format48113
+Node: Exchanging Port Rights59305
+Ref: Exchanging Port Rights-Footnote-164867
+Node: Memory65039
+Ref: Memory-Footnote-168133
+Node: Message Send68475
+Ref: Message Send-Footnote-175497
+Node: Message Receive75780
+Ref: Message Receive-Footnote-185432
+Node: Atomicity85713
+Node: Port Manipulation Interface88487
+Node: Port Creation90042
+Node: Port Destruction94831
+Node: Port Names97974
+Node: Port Rights102221
+Node: Ports and other Tasks106010
+Node: Receive Rights110103
+Node: Port Sets117034
+Node: Request Notifications119437
+Node: Inherited Ports124241
+Node: Virtual Memory Interface127925
+Node: Memory Allocation129178
+Node: Memory Deallocation131703
+Node: Data Transfer133167
+Node: Memory Attributes136693
+Node: Mapping Memory Objects146132
+Node: Memory Statistics149424
+Node: External Memory Management150998
+Node: Memory Object Server151703
+Node: Memory Object Creation154412
+Node: Memory Object Termination158460
+Node: Memory Objects and Data161399
+Node: Memory Object Locking178545
+Node: Memory Object Attributes184440
+Node: Default Memory Manager190277
+Node: Threads and Tasks195999
+Node: Thread Interface196336
+Node: Thread Creation197332
+Node: Thread Termination198449
+Node: Thread Information198920
+Node: Thread Settings205019
+Node: Thread Execution206253
+Node: Scheduling213546
+Node: Thread Priority213901
+Node: Hand-Off Scheduling216535
+Node: Scheduling Policy221660
+Node: Thread Special Ports222992
+Node: Exceptions225438
+Node: Task Interface226308
+Node: Task Creation227320
+Node: Task Termination228655
+Node: Task Information229257
+Node: Task Execution236159
+Node: Task Special Ports240572
+Node: Syscall Emulation244426
+Node: Profiling245657
+Node: Host Interface249420
+Node: Host Ports250405
+Node: Host Information252478
+Node: Host Time257861
+Node: Host Reboot260528
+Node: Processors and Processor Sets261080
+Node: Processor Set Interface262058
+Node: Processor Set Ports262825
+Node: Processor Set Access263655
+Node: Processor Set Creation265915
+Node: Processor Set Destruction266942
+Node: Tasks and Threads on Sets267863
+Node: Processor Set Priority273030
+Node: Processor Set Policy274320
+Node: Processor Set Info275934
+Node: Processor Interface279747
+Node: Hosted Processors280472
+Node: Processor Control281463
+Node: Processors and Sets282929
+Node: Processor Info284807
+Node: Device Interface287549
+Node: Device Reply Server289164
+Node: Device Open290456
+Node: Device Close292579
+Node: Device Read293158
+Node: Device Write296077
+Node: Device Map298882
+Node: Device Status299773
+Node: Device Filter302586
+Node: Kernel Debugger308333
+Node: Operation309060
+Node: Commands312037
+Node: Variables325822
+Node: Expressions327210
+Node: Copying328559
+Node: Documentation License347788
+Node: GNU Free Documentation License348377
+Node: CMU License370776
+Node: Concept Index372011
+Node: Function and Data Index375857
End Tag Table
diff --git a/doc/mach.info-1 b/doc/mach.info-1
index aabe01d..32bc7c3 100644
--- a/doc/mach.info-1
+++ b/doc/mach.info-1
@@ -1,9 +1,9 @@
-This is mach.info, produced by makeinfo version 6.1 from mach.texi.
+This is mach.info, produced by makeinfo version 6.3 from mach.texi.
This file documents the GNU Mach microkernel.
- This is edition 0.4, last updated on 20 April 2016, of 'The GNU Mach
-Reference Manual', for version 1.7+git20160809.
+ This is edition 0.4, last updated on 31 August 2016, of 'The GNU Mach
+Reference Manual', for version 1.7+git20160921.
Copyright (C) 2001, 2002, 2006, 2007, 2008 Free Software Foundation,
Inc.
@@ -45,8 +45,8 @@ Main Menu
This file documents the GNU Mach microkernel.
- This is edition 0.4, last updated on 20 April 2016, of 'The GNU Mach
-Reference Manual', for version 1.7+git20160809.
+ This is edition 0.4, last updated on 31 August 2016, of 'The GNU Mach
+Reference Manual', for version 1.7+git20160921.
Copyright (C) 2001, 2002, 2006, 2007, 2008 Free Software Foundation,
Inc.
@@ -3347,14 +3347,14 @@ File: mach.info, Node: Mapping Memory Objects, Next:
Memory Statistics, Prev:
memory exception.
TARGET_TASK is the task to be affected. The starting address is
- ADDRESS. If the ANYWHERE option is used, this address is used as a
- starting hint. The address actually allocated will be returned in
- ADDRESS. SIZE is the number of bytes to allocate (rounded by the
- system in a machine dependent way). The alignment restriction is
- specified by MASK. Bits asserted in this mask must not be asserted
- in the address returned. If ANYWHERE is set, the kernel should
- find and allocate any region of the specified size, and return the
- address of the resulting region in ADDRESS.
+ ADDRESS. If the ANYWHERE option is used, this address is ignored.
+ The address actually allocated will be returned in ADDRESS. SIZE
+ is the number of bytes to allocate (rounded by the system in a
+ machine dependent way). The alignment restriction is specified by
+ MASK. Bits asserted in this mask must not be asserted in the
+ address returned. If ANYWHERE is set, the kernel should find and
+ allocate any region of the specified size, and return the address
+ of the resulting region in ADDRESS.
MEMORY_OBJECT is the port that represents the memory object: used
by user tasks in 'vm_map'; used by the make requests for data or
diff --git a/doc/mach.info-2 b/doc/mach.info-2
index 847cb6d..78d8dd1 100644
--- a/doc/mach.info-2
+++ b/doc/mach.info-2
@@ -1,9 +1,9 @@
-This is mach.info, produced by makeinfo version 6.1 from mach.texi.
+This is mach.info, produced by makeinfo version 6.3 from mach.texi.
This file documents the GNU Mach microkernel.
- This is edition 0.4, last updated on 20 April 2016, of 'The GNU Mach
-Reference Manual', for version 1.7+git20160809.
+ This is edition 0.4, last updated on 31 August 2016, of 'The GNU Mach
+Reference Manual', for version 1.7+git20160921.
Copyright (C) 2001, 2002, 2006, 2007, 2008 Free Software Foundation,
Inc.
diff --git a/doc/mach.texi b/doc/mach.texi
index 0aeed76..98f72fa 100644
--- a/doc/mach.texi
+++ b/doc/mach.texi
@@ -3367,7 +3367,7 @@ exception.
@var{target_task} is the task to be affected. The starting address is
@var{address}. If the @var{anywhere} option is used, this address is
-used as a starting hint. The address actually allocated will be returned in
+ignored. The address actually allocated will be returned in
@var{address}. @var{size} is the number of bytes to allocate (rounded by
the system in a machine dependent way). The alignment restriction is
specified by @var{mask}. Bits asserted in this mask must not be
diff --git a/doc/stamp-vti b/doc/stamp-vti
index 5bfda11..165f9b3 100644
--- a/doc/stamp-vti
+++ b/doc/stamp-vti
@@ -1,4 +1,4 @@
address@hidden UPDATED 20 April 2016
address@hidden UPDATED-MONTH April 2016
address@hidden EDITION 1.7+git20160809
address@hidden VERSION 1.7+git20160809
address@hidden UPDATED 31 August 2016
address@hidden UPDATED-MONTH August 2016
address@hidden EDITION 1.7+git20160921
address@hidden VERSION 1.7+git20160921
diff --git a/doc/version.texi b/doc/version.texi
index 5bfda11..165f9b3 100644
--- a/doc/version.texi
+++ b/doc/version.texi
@@ -1,4 +1,4 @@
address@hidden UPDATED 20 April 2016
address@hidden UPDATED-MONTH April 2016
address@hidden EDITION 1.7+git20160809
address@hidden VERSION 1.7+git20160809
address@hidden UPDATED 31 August 2016
address@hidden UPDATED-MONTH August 2016
address@hidden EDITION 1.7+git20160921
address@hidden VERSION 1.7+git20160921
diff --git a/i386/Makefrag.am b/i386/Makefrag.am
index c61a3f6..90f20fb 100644
--- a/i386/Makefrag.am
+++ b/i386/Makefrag.am
@@ -151,6 +151,7 @@ libkernel_a_SOURCES += \
i386/i386/setjmp.h \
i386/i386/spl.S \
i386/i386/spl.h \
+ i386/i386/strings.c \
i386/i386/task.h \
i386/i386/thread.h \
i386/i386/time_stamp.h \
diff --git a/i386/i386/db_trace.c b/i386/i386/db_trace.c
index c8789e7..898feba 100644
--- a/i386/i386/db_trace.c
+++ b/i386/i386/db_trace.c
@@ -431,7 +431,7 @@ db_i386_stack_trace(
}
lastframe = 0;
- while (count-- && frame != 0) {
+ while (count--) {
int narg;
char * name;
db_expr_t offset;
@@ -459,9 +459,12 @@ db_i386_stack_trace(
goto next_frame;
} else {
frame_type = 0;
- narg = db_numargs(frame, task);
+ if (frame)
+ narg = db_numargs(frame, task);
+ else
+ narg = -1;
}
- } else if (INKERNEL(callpc) ^ INKERNEL(frame)) {
+ } else if (!frame || INKERNEL(callpc) ^ INKERNEL(frame)) {
frame_type = 0;
narg = -1;
} else {
@@ -477,6 +480,10 @@ db_i386_stack_trace(
} else
db_printf("%s(", name);
+ if (!frame) {
+ db_printf(")\n");
+ break;
+ }
argp = &frame->f_arg0;
while (narg > 0) {
db_printf("%x",
db_get_task_value((long)argp,sizeof(long),FALSE,task));
@@ -501,10 +508,6 @@ db_i386_stack_trace(
next_frame:
db_nextframe(&lastframe, &frame, &callpc, frame_type, th);
- if (frame == 0) {
- /* end of chain */
- break;
- }
if (!INKERNEL(lastframe) ||
(!INKERNEL(callpc) && !INKERNEL(frame)))
user_frame++;
@@ -513,7 +516,7 @@ db_i386_stack_trace(
if (kernel_only)
break;
}
- if (frame <= lastframe) {
+ if (frame && frame <= lastframe) {
if (INKERNEL(lastframe) && !INKERNEL(frame))
continue;
db_printf("Bad frame pointer: 0x%x\n", frame);
diff --git a/i386/i386/locore.S b/i386/i386/locore.S
index c715d95..ddba224 100644
--- a/i386/i386/locore.S
+++ b/i386/i386/locore.S
@@ -1180,6 +1180,8 @@ DATA(cpu_features)
.long 0
.text
+END(syscall)
+
/* Discover what kind of cpu we have; return the family number
(3, 4, 5, 6, for 386, 486, 586, 686 respectively). */
ENTRY(discover_x86_cpu_type)
diff --git a/i386/i386/model_dep.h b/i386/i386/model_dep.h
index ab2738f..54aa1ec 100644
--- a/i386/i386/model_dep.h
+++ b/i386/i386/model_dep.h
@@ -50,11 +50,4 @@ extern void halt_cpu (void) __attribute__ ((noreturn));
*/
extern void halt_all_cpus (boolean_t reboot) __attribute__ ((noreturn));
-/*
- * More-specific code provides these;
- * they indicate the total extent of physical memory
- * that we know about and might ever have to manage.
- */
-extern vm_offset_t phys_first_addr, phys_last_addr;
-
#endif /* _I386AT_MODEL_DEP_H_ */
diff --git a/i386/i386/phys.c b/i386/i386/phys.c
index 8681fba..a5c3a15 100644
--- a/i386/i386/phys.c
+++ b/i386/i386/phys.c
@@ -47,12 +47,12 @@
* pmap_zero_page zeros the specified (machine independent) page.
*/
void
-pmap_zero_page(vm_offset_t p)
+pmap_zero_page(phys_addr_t p)
{
assert(p != vm_page_fictitious_addr);
vm_offset_t v;
pmap_mapwindow_t *map;
- boolean_t mapped = p >= phys_last_addr;
+ boolean_t mapped = p >= VM_PAGE_DIRECTMAP_LIMIT;
if (mapped)
{
@@ -73,14 +73,14 @@ pmap_zero_page(vm_offset_t p)
*/
void
pmap_copy_page(
- vm_offset_t src,
- vm_offset_t dst)
+ phys_addr_t src,
+ phys_addr_t dst)
{
vm_offset_t src_addr_v, dst_addr_v;
pmap_mapwindow_t *src_map = NULL;
pmap_mapwindow_t *dst_map;
- boolean_t src_mapped = src >= phys_last_addr;
- boolean_t dst_mapped = dst >= phys_last_addr;
+ boolean_t src_mapped = src >= VM_PAGE_DIRECTMAP_LIMIT;
+ boolean_t dst_mapped = dst >= VM_PAGE_DIRECTMAP_LIMIT;
assert(src != vm_page_fictitious_addr);
assert(dst != vm_page_fictitious_addr);
@@ -116,12 +116,12 @@ pmap_copy_page(
void
copy_to_phys(
vm_offset_t src_addr_v,
- vm_offset_t dst_addr_p,
+ phys_addr_t dst_addr_p,
int count)
{
vm_offset_t dst_addr_v;
pmap_mapwindow_t *dst_map;
- boolean_t mapped = dst_addr_p >= phys_last_addr;
+ boolean_t mapped = dst_addr_p >= VM_PAGE_DIRECTMAP_LIMIT;
assert(dst_addr_p != vm_page_fictitious_addr);
assert(pa_to_pte(dst_addr_p + count-1) == pa_to_pte(dst_addr_p));
@@ -147,13 +147,13 @@ copy_to_phys(
*/
void
copy_from_phys(
- vm_offset_t src_addr_p,
+ phys_addr_t src_addr_p,
vm_offset_t dst_addr_v,
int count)
{
vm_offset_t src_addr_v;
pmap_mapwindow_t *src_map;
- boolean_t mapped = src_addr_p >= phys_last_addr;
+ boolean_t mapped = src_addr_p >= VM_PAGE_DIRECTMAP_LIMIT;
assert(src_addr_p != vm_page_fictitious_addr);
assert(pa_to_pte(src_addr_p + count-1) == pa_to_pte(src_addr_p));
@@ -176,7 +176,7 @@ copy_from_phys(
*
* Convert a kernel virtual address to a physical address
*/
-vm_offset_t
+phys_addr_t
kvtophys(vm_offset_t addr)
{
pt_entry_t *pte;
diff --git a/i386/i386/strings.c b/i386/i386/strings.c
new file mode 100644
index 0000000..84a3bc1
--- /dev/null
+++ b/i386/i386/strings.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2014 Richard Braun.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#define ARCH_STRING_MEMCPY
+#define ARCH_STRING_MEMMOVE
+#define ARCH_STRING_MEMSET
+#define ARCH_STRING_MEMCMP
+
+#ifdef ARCH_STRING_MEMCPY
+void *
+memcpy(void *dest, const void *src, size_t n)
+{
+ void *orig_dest;
+
+ orig_dest = dest;
+ asm volatile("rep movsb"
+ : "+D" (dest), "+S" (src), "+c" (n)
+ : : "memory");
+ return orig_dest;
+}
+#endif /* ARCH_STRING_MEMCPY */
+
+#ifdef ARCH_STRING_MEMMOVE
+void *
+memmove(void *dest, const void *src, size_t n)
+{
+ void *orig_dest;
+
+ orig_dest = dest;
+
+ if (dest <= src)
+ asm volatile("rep movsb"
+ : "+D" (dest), "+S" (src), "+c" (n)
+ : : "memory");
+ else {
+ dest += n - 1;
+ src += n - 1;
+ asm volatile("std; rep movsb; cld"
+ : "+D" (dest), "+S" (src), "+c" (n)
+ : : "memory");
+ }
+
+ return orig_dest;
+}
+#endif /* ARCH_STRING_MEMMOVE */
+
+#ifdef ARCH_STRING_MEMSET
+void *
+memset(void *s, int c, size_t n)
+{
+ void *orig_s;
+
+ orig_s = s;
+ asm volatile("rep stosb"
+ : "+D" (s), "+c" (n)
+ : "a" (c)
+ : "memory");
+ return orig_s;
+}
+#endif /* ARCH_STRING_MEMSET */
+
+#ifdef ARCH_STRING_MEMCMP
+int
+memcmp(const void *s1, const void *s2, size_t n)
+{
+ unsigned char c1, c2;
+
+ if (n == 0)
+ return 0;
+
+ asm volatile("repe cmpsb"
+ : "+D" (s1), "+S" (s2), "+c" (n)
+ : : "memory");
+ c1 = *(((const unsigned char *)s1) - 1);
+ c2 = *(((const unsigned char *)s2) - 1);
+ return (int)c1 - (int)c2;
+}
+#endif /* ARCH_STRING_MEMCMP */
+
+#ifdef ARCH_STRING_STRLEN
+size_t
+strlen(const char *s)
+{
+ size_t n;
+
+ n = (size_t)-1;
+ asm volatile("repne scasb"
+ : "+D" (s), "+c" (n)
+ : "a" (0)
+ : "memory");
+ return ~n - 1;
+}
+#endif /* ARCH_STRING_STRLEN */
+
+#ifdef ARCH_STRING_STRCPY
+char *
+strcpy(char *dest, const char *src)
+{
+ char *orig_dest;
+
+ orig_dest = dest;
+ asm volatile("1:\n"
+ "lodsb\n"
+ "stosb\n"
+ "testb %%al, %%al\n"
+ "jnz 1b\n"
+ : "+D" (dest), "+S" (src)
+ : : "al", "memory");
+ return orig_dest;
+}
+#endif /* ARCH_STRING_STRCPY */
+
+#ifdef ARCH_STRING_STRCMP
+int
+strcmp(const char *s1, const char *s2)
+{
+ unsigned char c1, c2;
+
+ asm volatile("1:\n"
+ "lodsb\n"
+ "scasb\n"
+ "jne 1f\n"
+ "testb %%al, %%al\n"
+ "jnz 1b\n"
+ "1:\n"
+ : "+D" (s1), "+S" (s2)
+ : : "al", "memory");
+ c1 = *(((const unsigned char *)s1) - 1);
+ c2 = *(((const unsigned char *)s2) - 1);
+ return (int)c1 - (int)c2;
+}
+#endif /* ARCH_STRING_STRCMP */
diff --git a/i386/i386/trap.c b/i386/i386/trap.c
index 6470504..d4bdc7f 100644
--- a/i386/i386/trap.c
+++ b/i386/i386/trap.c
@@ -351,16 +351,6 @@ int user_trap(struct i386_saved_state *regs)
int type;
thread_t thread = current_thread();
- if ((vm_offset_t)thread < phys_last_addr) {
- printf("user_trap: bad thread pointer 0x%p\n", thread);
- printf("trap type %ld, code 0x%lx, va 0x%lx, eip 0x%lx\n",
- regs->trapno, regs->err, regs->cr2, regs->eip);
- asm volatile ("1: hlt; jmp 1b");
- }
-#if 0
-printf("user trap %d error %d sub %08x\n", type, code, subcode);
-#endif
-
type = regs->trapno;
code = 0;
subcode = 0;
diff --git a/i386/i386at/biosmem.c b/i386/i386at/biosmem.c
index a7a440e..a104020 100644
--- a/i386/i386at/biosmem.c
+++ b/i386/i386at/biosmem.c
@@ -18,7 +18,6 @@
#include <string.h>
#include <i386/model_dep.h>
#include <i386at/biosmem.h>
-#include <i386at/elf.h>
#include <kern/assert.h>
#include <kern/debug.h>
#include <kern/macros.h>
@@ -29,6 +28,8 @@
#include <sys/types.h>
#include <vm/vm_page.h>
+#define DEBUG 0
+
#define __boot
#define __bootdata
#define __init
@@ -41,7 +42,26 @@
#define BOOT_CGACHARS (80 * 25)
#define BOOT_CGACOLOR 0x7
-extern char _start, _end;
+#define BIOSMEM_MAX_BOOT_DATA 64
+
+/*
+ * Boot data descriptor.
+ *
+ * The start and end addresses must not be page-aligned, since there
+ * could be more than one range inside a single page.
+ */
+struct biosmem_boot_data {
+ phys_addr_t start;
+ phys_addr_t end;
+ boolean_t temporary;
+};
+
+/*
+ * Sorted array of boot data descriptors.
+ */
+static struct biosmem_boot_data biosmem_boot_data_array[BIOSMEM_MAX_BOOT_DATA]
+ __bootdata;
+static unsigned int biosmem_nr_boot_data __bootdata;
/*
* Maximum number of entries in the BIOS memory map.
@@ -71,19 +91,6 @@ struct biosmem_map_entry {
};
/*
- * Contiguous block of physical memory.
- *
- * Tha "available" range records what has been passed to the VM system as
- * available inside the segment.
- */
-struct biosmem_segment {
- phys_addr_t start;
- phys_addr_t end;
- phys_addr_t avail_start;
- phys_addr_t avail_end;
-};
-
-/*
* Memory map built from the information passed by the boot loader.
*
* If the boot loader didn't pass a valid memory map, a simple map is built
@@ -94,6 +101,14 @@ static struct biosmem_map_entry
biosmem_map[BIOSMEM_MAX_MAP_SIZE * 2]
static unsigned int biosmem_map_size __bootdata;
/*
+ * Contiguous block of physical memory.
+ */
+struct biosmem_segment {
+ phys_addr_t start;
+ phys_addr_t end;
+};
+
+/*
* Physical segment boundaries.
*/
static struct biosmem_segment biosmem_segments[VM_PAGE_MAX_SEGS] __bootdata;
@@ -103,11 +118,24 @@ static struct biosmem_segment
biosmem_segments[VM_PAGE_MAX_SEGS] __bootdata;
*
* This heap is located above BIOS memory.
*/
-static uint32_t biosmem_heap_start __bootdata;
-static uint32_t biosmem_heap_cur __bootdata;
-static uint32_t biosmem_heap_end __bootdata;
+static phys_addr_t biosmem_heap_start __bootdata;
+static phys_addr_t biosmem_heap_bottom __bootdata;
+static phys_addr_t biosmem_heap_top __bootdata;
+static phys_addr_t biosmem_heap_end __bootdata;
+
+/*
+ * Boot allocation policy.
+ *
+ * Top-down allocations are normally preferred to avoid unnecessarily
+ * filling the DMA segment.
+ */
+static boolean_t biosmem_heap_topdown __bootdata;
-static char biosmem_panic_toobig_msg[] __bootdata
+static char biosmem_panic_inval_boot_data[] __bootdata
+ = "biosmem: invalid boot data";
+static char biosmem_panic_too_many_boot_data[] __bootdata
+ = "biosmem: too many boot data ranges";
+static char biosmem_panic_too_big_msg[] __bootdata
= "biosmem: too many memory map entries";
#ifndef MACH_HYP
static char biosmem_panic_setup_msg[] __bootdata
@@ -120,6 +148,103 @@ static char biosmem_panic_inval_msg[] __bootdata
static char biosmem_panic_nomem_msg[] __bootdata
= "biosmem: unable to allocate memory";
+void __boot
+biosmem_register_boot_data(phys_addr_t start, phys_addr_t end,
+ boolean_t temporary)
+{
+ unsigned int i;
+
+ if (start >= end) {
+ boot_panic(biosmem_panic_inval_boot_data);
+ }
+
+ if (biosmem_nr_boot_data == ARRAY_SIZE(biosmem_boot_data_array)) {
+ boot_panic(biosmem_panic_too_many_boot_data);
+ }
+
+ for (i = 0; i < biosmem_nr_boot_data; i++) {
+ /* Check if the new range overlaps */
+ if ((end > biosmem_boot_data_array[i].start)
+ && (start < biosmem_boot_data_array[i].end)) {
+
+ /*
+ * If it does, check whether it's part of another range.
+ * For example, this applies to debugging symbols directly
+ * taken from the kernel image.
+ */
+ if ((start >= biosmem_boot_data_array[i].start)
+ && (end <= biosmem_boot_data_array[i].end)) {
+
+ /*
+ * If it's completely included, make sure that a permanent
+ * range remains permanent.
+ *
+ * XXX This means that if one big range is first registered
+ * as temporary, and a smaller range inside of it is
+ * registered as permanent, the bigger range becomes
+ * permanent. It's not easy nor useful in practice to do
+ * better than that.
+ */
+ if (biosmem_boot_data_array[i].temporary != temporary) {
+ biosmem_boot_data_array[i].temporary = FALSE;
+ }
+
+ return;
+ }
+
+ boot_panic(biosmem_panic_inval_boot_data);
+ }
+
+ if (end <= biosmem_boot_data_array[i].start) {
+ break;
+ }
+ }
+
+ boot_memmove(&biosmem_boot_data_array[i + 1],
+ &biosmem_boot_data_array[i],
+ (biosmem_nr_boot_data - i) *
sizeof(*biosmem_boot_data_array));
+
+ biosmem_boot_data_array[i].start = start;
+ biosmem_boot_data_array[i].end = end;
+ biosmem_boot_data_array[i].temporary = temporary;
+ biosmem_nr_boot_data++;
+}
+
+static void __init
+biosmem_unregister_boot_data(phys_addr_t start, phys_addr_t end)
+{
+ unsigned int i;
+
+ if (start >= end) {
+ panic(biosmem_panic_inval_boot_data);
+ }
+
+ assert(biosmem_nr_boot_data != 0);
+
+ for (i = 0; biosmem_nr_boot_data; i++) {
+ if ((start == biosmem_boot_data_array[i].start)
+ && (end == biosmem_boot_data_array[i].end)) {
+ break;
+ }
+ }
+
+ if (i == biosmem_nr_boot_data) {
+ return;
+ }
+
+#if DEBUG
+ printf("biosmem: unregister boot data: %llx:%llx\n",
+ (unsigned long long)biosmem_boot_data_array[i].start,
+ (unsigned long long)biosmem_boot_data_array[i].end);
+#endif /* DEBUG */
+
+ biosmem_nr_boot_data--;
+
+ boot_memmove(&biosmem_boot_data_array[i],
+ &biosmem_boot_data_array[i + 1],
+ (biosmem_nr_boot_data - i) *
sizeof(*biosmem_boot_data_array));
+}
+
#ifndef MACH_HYP
static void __boot
@@ -302,7 +427,7 @@ biosmem_map_adjust(void)
*/
if (biosmem_map_size >= ARRAY_SIZE(biosmem_map))
- boot_panic(biosmem_panic_toobig_msg);
+ boot_panic(biosmem_panic_too_big_msg);
biosmem_map[biosmem_map_size] = tmp;
biosmem_map_size++;
@@ -321,6 +446,16 @@ biosmem_map_adjust(void)
biosmem_map_sort();
}
+/*
+ * Find addresses of physical memory within a given range.
+ *
+ * This function considers the memory map with the [*phys_start, *phys_end]
+ * range on entry, and returns the lowest address of physical memory
+ * in *phys_start, and the highest address of unusable memory immediately
+ * following physical memory in *phys_end.
+ *
+ * These addresses are normally used to establish the range of a segment.
+ */
static int __boot
biosmem_map_find_avail(phys_addr_t *phys_start, phys_addr_t *phys_end)
{
@@ -382,161 +517,132 @@ biosmem_segment_size(unsigned int seg_index)
return biosmem_segments[seg_index].end - biosmem_segments[seg_index].start;
}
-#ifndef MACH_HYP
-
-static void __boot
-biosmem_save_cmdline_sizes(struct multiboot_raw_info *mbi)
+static int __boot
+biosmem_find_avail_clip(phys_addr_t *avail_start, phys_addr_t *avail_end,
+ phys_addr_t data_start, phys_addr_t data_end)
{
- struct multiboot_raw_module *mod;
- uint32_t i, va;
+ phys_addr_t orig_end;
- if (mbi->flags & MULTIBOOT_LOADER_CMDLINE) {
- va = phystokv(mbi->cmdline);
- mbi->unused0 = boot_strlen((char *)va) + 1;
- }
+ assert(data_start < data_end);
- if (mbi->flags & MULTIBOOT_LOADER_MODULES) {
- unsigned long addr;
+ orig_end = data_end;
+ data_start = vm_page_trunc(data_start);
+ data_end = vm_page_round(data_end);
- addr = phystokv(mbi->mods_addr);
+ if (data_end < orig_end) {
+ boot_panic(biosmem_panic_inval_boot_data);
+ }
- for (i = 0; i < mbi->mods_count; i++) {
- mod = (struct multiboot_raw_module *)addr + i;
- va = phystokv(mod->string);
- mod->reserved = boot_strlen((char *)va) + 1;
- }
+ if ((data_end <= *avail_start) || (data_start >= *avail_end)) {
+ return 0;
}
-}
-static void __boot
-biosmem_find_boot_data_update(uint32_t min, uint32_t *start, uint32_t *end,
- uint32_t data_start, uint32_t data_end)
-{
- if ((min <= data_start) && (data_start < *start)) {
- *start = data_start;
- *end = data_end;
+ if (data_start > *avail_start) {
+ *avail_end = data_start;
+ } else {
+ if (data_end >= *avail_end) {
+ return -1;
+ }
+
+ *avail_start = data_end;
}
+
+ return 0;
}
/*
- * Find the first boot data in the given range, and return their containing
- * area (start address is returned directly, end address is returned in end).
- * The following are considered boot data :
- * - the kernel
- * - the kernel command line
- * - the module table
- * - the modules
- * - the modules command lines
- * - the ELF section header table
- * - the ELF .shstrtab, .symtab and .strtab sections
+ * Find available memory in the given range.
+ *
+ * The search starts at the given start address, up to the given end address.
+ * If a range is found, it is stored through the avail_startp and avail_endp
+ * pointers.
*
- * If no boot data was found, 0 is returned, and the end address isn't set.
+ * The range boundaries are page-aligned on return.
*/
-static uint32_t __boot
-biosmem_find_boot_data(const struct multiboot_raw_info *mbi, uint32_t min,
- uint32_t max, uint32_t *endp)
+static int __boot
+biosmem_find_avail(phys_addr_t start, phys_addr_t end,
+ phys_addr_t *avail_start, phys_addr_t *avail_end)
{
- struct multiboot_raw_module *mod;
- struct elf_shdr *shdr;
- uint32_t i, start, end = end;
- unsigned long tmp;
-
- start = max;
-
- biosmem_find_boot_data_update(min, &start, &end, _kvtophys(&_start),
- _kvtophys(&_end));
-
- if ((mbi->flags & MULTIBOOT_LOADER_CMDLINE) && (mbi->cmdline != 0))
- biosmem_find_boot_data_update(min, &start, &end, mbi->cmdline,
- mbi->cmdline + mbi->unused0);
-
- if (mbi->flags & MULTIBOOT_LOADER_MODULES) {
- i = mbi->mods_count * sizeof(struct multiboot_raw_module);
- biosmem_find_boot_data_update(min, &start, &end, mbi->mods_addr,
- mbi->mods_addr + i);
- tmp = phystokv(mbi->mods_addr);
-
- for (i = 0; i < mbi->mods_count; i++) {
- mod = (struct multiboot_raw_module *)tmp + i;
- biosmem_find_boot_data_update(min, &start, &end, mod->mod_start,
- mod->mod_end);
-
- if (mod->string != 0)
- biosmem_find_boot_data_update(min, &start, &end, mod->string,
- mod->string + mod->reserved);
- }
- }
+ phys_addr_t orig_start;
+ unsigned int i;
+ int error;
- if (mbi->flags & MULTIBOOT_LOADER_SHDR) {
- tmp = mbi->shdr_num * mbi->shdr_size;
- biosmem_find_boot_data_update(min, &start, &end, mbi->shdr_addr,
- mbi->shdr_addr + tmp);
- tmp = phystokv(mbi->shdr_addr);
+ assert(start <= end);
- for (i = 0; i < mbi->shdr_num; i++) {
- shdr = (struct elf_shdr *)(tmp + (i * mbi->shdr_size));
+ orig_start = start;
+ start = vm_page_round(start);
+ end = vm_page_trunc(end);
- if ((shdr->type != ELF_SHT_SYMTAB)
- && (shdr->type != ELF_SHT_STRTAB))
- continue;
+ if ((start < orig_start) || (start >= end)) {
+ return -1;
+ }
+
+ *avail_start = start;
+ *avail_end = end;
+
+ for (i = 0; i < biosmem_nr_boot_data; i++) {
+ error = biosmem_find_avail_clip(avail_start, avail_end,
+ biosmem_boot_data_array[i].start,
+ biosmem_boot_data_array[i].end);
- biosmem_find_boot_data_update(min, &start, &end, shdr->addr,
- shdr->addr + shdr->size);
+ if (error) {
+ return -1;
}
}
- if (start == max)
- return 0;
-
- *endp = end;
- return start;
+ return 0;
}
+#ifndef MACH_HYP
+
static void __boot
-biosmem_setup_allocator(struct multiboot_raw_info *mbi)
+biosmem_setup_allocator(const struct multiboot_raw_info *mbi)
{
- uint32_t heap_start, heap_end, max_heap_start, max_heap_end;
- uint32_t mem_end, next;
+ phys_addr_t heap_start, heap_end, max_heap_start, max_heap_end;
+ phys_addr_t start, end;
+ int error;
/*
* Find some memory for the heap. Look for the largest unused area in
* upper memory, carefully avoiding all boot data.
*/
- mem_end = vm_page_trunc((mbi->mem_upper + 1024) << 10);
+ end = vm_page_trunc((mbi->mem_upper + 1024) << 10);
#ifndef __LP64__
- if (mem_end > VM_PAGE_DIRECTMAP_LIMIT)
- mem_end = VM_PAGE_DIRECTMAP_LIMIT;
+ if (end > VM_PAGE_DIRECTMAP_LIMIT)
+ end = VM_PAGE_DIRECTMAP_LIMIT;
#endif /* __LP64__ */
max_heap_start = 0;
max_heap_end = 0;
- next = BIOSMEM_END;
+ start = BIOSMEM_END;
- do {
- heap_start = next;
- heap_end = biosmem_find_boot_data(mbi, heap_start, mem_end, &next);
+ for (;;) {
+ error = biosmem_find_avail(start, end, &heap_start, &heap_end);
- if (heap_end == 0) {
- heap_end = mem_end;
- next = 0;
+ if (error) {
+ break;
}
if ((heap_end - heap_start) > (max_heap_end - max_heap_start)) {
max_heap_start = heap_start;
max_heap_end = heap_end;
}
- } while (next != 0);
- max_heap_start = vm_page_round(max_heap_start);
- max_heap_end = vm_page_trunc(max_heap_end);
+ start = heap_end;
+ }
if (max_heap_start >= max_heap_end)
boot_panic(biosmem_panic_setup_msg);
biosmem_heap_start = max_heap_start;
biosmem_heap_end = max_heap_end;
- biosmem_heap_cur = biosmem_heap_end;
+ biosmem_heap_bottom = biosmem_heap_start;
+ biosmem_heap_top = biosmem_heap_end;
+ biosmem_heap_topdown = TRUE;
+
+ /* Prevent biosmem_free_usable() from releasing the heap */
+ biosmem_register_boot_data(biosmem_heap_start, biosmem_heap_end, FALSE);
}
#endif /* MACH_HYP */
@@ -544,7 +650,7 @@ biosmem_setup_allocator(struct multiboot_raw_info *mbi)
static void __boot
biosmem_bootstrap_common(void)
{
- phys_addr_t phys_start, phys_end, last_addr;
+ phys_addr_t phys_start, phys_end;
int error;
biosmem_map_adjust();
@@ -557,7 +663,6 @@ biosmem_bootstrap_common(void)
boot_panic(biosmem_panic_noseg_msg);
biosmem_set_segment(VM_PAGE_SEG_DMA, phys_start, phys_end);
- last_addr = phys_end;
phys_start = VM_PAGE_DMA_LIMIT;
#ifdef VM_PAGE_DMA32_LIMIT
@@ -565,10 +670,9 @@ biosmem_bootstrap_common(void)
error = biosmem_map_find_avail(&phys_start, &phys_end);
if (error)
- goto out;
+ return;
biosmem_set_segment(VM_PAGE_SEG_DMA32, phys_start, phys_end);
- last_addr = phys_end;
phys_start = VM_PAGE_DMA32_LIMIT;
#endif /* VM_PAGE_DMA32_LIMIT */
@@ -576,23 +680,18 @@ biosmem_bootstrap_common(void)
error = biosmem_map_find_avail(&phys_start, &phys_end);
if (error)
- goto out;
+ return;
biosmem_set_segment(VM_PAGE_SEG_DIRECTMAP, phys_start, phys_end);
- last_addr = phys_end;
phys_start = VM_PAGE_DIRECTMAP_LIMIT;
phys_end = VM_PAGE_HIGHMEM_LIMIT;
error = biosmem_map_find_avail(&phys_start, &phys_end);
if (error)
- goto out;
+ return;
biosmem_set_segment(VM_PAGE_SEG_HIGHMEM, phys_start, phys_end);
-
-out:
- /* XXX phys_last_addr must be part of the direct physical mapping */
- phys_last_addr = last_addr;
}
#ifdef MACH_HYP
@@ -616,26 +715,34 @@ biosmem_xen_bootstrap(void)
biosmem_heap_end = boot_info.nr_pages << PAGE_SHIFT;
#ifndef __LP64__
- /* TODO Check that this actually makes sense */
if (biosmem_heap_end > VM_PAGE_DIRECTMAP_LIMIT)
biosmem_heap_end = VM_PAGE_DIRECTMAP_LIMIT;
#endif /* __LP64__ */
+ biosmem_heap_bottom = biosmem_heap_start;
+ biosmem_heap_top = biosmem_heap_end;
+
/*
- * XXX Allocation on Xen must be bottom-up :
+ * XXX Allocation on Xen are initially bottom-up :
* At the "start of day", only 512k are available after the boot
* data. The pmap module then creates a 4g mapping so all physical
* memory is available, but it uses this allocator to do so.
* Therefore, it must return pages from this small 512k regions
* first.
*/
- biosmem_heap_cur = biosmem_heap_start;
+ biosmem_heap_topdown = FALSE;
+
+ /*
+ * Prevent biosmem_free_usable() from releasing the Xen boot information
+ * and the heap.
+ */
+ biosmem_register_boot_data(0, biosmem_heap_end, FALSE);
}
#else /* MACH_HYP */
void __boot
-biosmem_bootstrap(struct multiboot_raw_info *mbi)
+biosmem_bootstrap(const struct multiboot_raw_info *mbi)
{
if (mbi->flags & MULTIBOOT_LOADER_MMAP)
biosmem_map_build(mbi);
@@ -643,12 +750,6 @@ biosmem_bootstrap(struct multiboot_raw_info *mbi)
biosmem_map_build_simple(mbi);
biosmem_bootstrap_common();
-
- /*
- * The kernel and modules command lines will be memory mapped later
- * during initialization. Their respective sizes must be saved.
- */
- biosmem_save_cmdline_sizes(mbi);
biosmem_setup_allocator(mbi);
}
@@ -659,34 +760,37 @@ biosmem_bootalloc(unsigned int nr_pages)
{
unsigned long addr, size;
- assert(!vm_page_ready());
-
size = vm_page_ptoa(nr_pages);
if (size == 0)
boot_panic(biosmem_panic_inval_msg);
-#ifdef MACH_HYP
- addr = biosmem_heap_cur;
-#else /* MACH_HYP */
- /* Top-down allocation to avoid unnecessarily filling DMA segments */
- addr = biosmem_heap_cur - size;
-#endif /* MACH_HYP */
+ if (biosmem_heap_topdown) {
+ addr = biosmem_heap_top - size;
- if ((addr < biosmem_heap_start) || (addr > biosmem_heap_cur))
- boot_panic(biosmem_panic_nomem_msg);
+ if ((addr < biosmem_heap_start) || (addr > biosmem_heap_top)) {
+ boot_panic(biosmem_panic_nomem_msg);
+ }
-#ifdef MACH_HYP
- biosmem_heap_cur += size;
-#else /* MACH_HYP */
- biosmem_heap_cur = addr;
-#endif /* MACH_HYP */
+ biosmem_heap_top = addr;
+ } else {
+ unsigned long end;
+
+ addr = biosmem_heap_bottom;
+ end = addr + size;
+
+ if ((end > biosmem_heap_end) || (end < biosmem_heap_bottom)) {
+ boot_panic(biosmem_panic_nomem_msg);
+ }
+
+ biosmem_heap_bottom = end;
+ }
return addr;
}
phys_addr_t __boot
-biosmem_directmap_size(void)
+biosmem_directmap_end(void)
{
if (biosmem_segment_size(VM_PAGE_SEG_DIRECTMAP) != 0)
return biosmem_segment_end(VM_PAGE_SEG_DIRECTMAP);
@@ -696,6 +800,8 @@ biosmem_directmap_size(void)
return biosmem_segment_end(VM_PAGE_SEG_DMA);
}
+#if DEBUG
+
static const char * __init
biosmem_type_desc(unsigned int type)
{
@@ -729,16 +835,23 @@ biosmem_map_show(void)
entry->base_addr + entry->length,
biosmem_type_desc(entry->type));
- printf("biosmem: heap: %x-%x\n", biosmem_heap_start, biosmem_heap_end);
+ printf("biosmem: heap: %llx:%llx\n",
+ (unsigned long long)biosmem_heap_start,
+ (unsigned long long)biosmem_heap_end);
}
+#else /* DEBUG */
+#define biosmem_map_show()
+#endif /* DEBUG */
+
static void __init
-biosmem_load_segment(struct biosmem_segment *seg, uint64_t max_phys_end,
- phys_addr_t phys_start, phys_addr_t phys_end,
- phys_addr_t avail_start, phys_addr_t avail_end)
+biosmem_load_segment(struct biosmem_segment *seg, uint64_t max_phys_end)
{
+ phys_addr_t phys_start, phys_end, avail_start, avail_end;
unsigned int seg_index;
+ phys_start = seg->start;
+ phys_end = seg->end;
seg_index = seg - biosmem_segments;
if (phys_end > max_phys_end) {
@@ -753,15 +866,28 @@ biosmem_load_segment(struct biosmem_segment *seg,
uint64_t max_phys_end,
phys_end = max_phys_end;
}
- if ((avail_start < phys_start) || (avail_start >= phys_end))
- avail_start = phys_start;
+ vm_page_load(seg_index, phys_start, phys_end);
- if ((avail_end <= phys_start) || (avail_end > phys_end))
- avail_end = phys_end;
+ /*
+ * Clip the remaining available heap to fit it into the loaded
+ * segment if possible.
+ */
- seg->avail_start = avail_start;
- seg->avail_end = avail_end;
- vm_page_load(seg_index, phys_start, phys_end, avail_start, avail_end);
+ if ((biosmem_heap_top > phys_start) && (biosmem_heap_bottom < phys_end)) {
+ if (biosmem_heap_bottom >= phys_start) {
+ avail_start = biosmem_heap_bottom;
+ } else {
+ avail_start = phys_start;
+ }
+
+ if (biosmem_heap_top <= phys_end) {
+ avail_end = biosmem_heap_top;
+ } else {
+ avail_end = phys_end;
+ }
+
+ vm_page_load_heap(seg_index, avail_start, avail_end);
+ }
}
void __init
@@ -777,8 +903,25 @@ biosmem_setup(void)
break;
seg = &biosmem_segments[i];
- biosmem_load_segment(seg, VM_PAGE_HIGHMEM_LIMIT, seg->start, seg->end,
- biosmem_heap_start, biosmem_heap_cur);
+ biosmem_load_segment(seg, VM_PAGE_HIGHMEM_LIMIT);
+ }
+}
+
+static void __init
+biosmem_unregister_temporary_boot_data(void)
+{
+ struct biosmem_boot_data *data;
+ unsigned int i;
+
+ for (i = 0; i < biosmem_nr_boot_data; i++) {
+ data = &biosmem_boot_data_array[i];
+
+ if (!data->temporary) {
+ continue;
+ }
+
+ biosmem_unregister_boot_data(data->start, data->end);
+ i = (unsigned int)-1;
}
}
@@ -787,9 +930,11 @@ biosmem_free_usable_range(phys_addr_t start, phys_addr_t
end)
{
struct vm_page *page;
- printf("biosmem: release to vm_page: %llx-%llx (%lluk)\n",
+#if DEBUG
+ printf("biosmem: release to vm_page: %llx:%llx (%lluk)\n",
(unsigned long long)start, (unsigned long long)end,
(unsigned long long)((end - start) >> 10));
+#endif
while (start < end) {
page = vm_page_lookup_pa(start);
@@ -800,85 +945,20 @@ biosmem_free_usable_range(phys_addr_t start, phys_addr_t
end)
}
static void __init
-biosmem_free_usable_update_start(phys_addr_t *start, phys_addr_t res_start,
- phys_addr_t res_end)
-{
- if ((*start >= res_start) && (*start < res_end))
- *start = res_end;
-}
-
-static phys_addr_t __init
-biosmem_free_usable_start(phys_addr_t start)
-{
- const struct biosmem_segment *seg;
- unsigned int i;
-
- biosmem_free_usable_update_start(&start, _kvtophys(&_start),
- _kvtophys(&_end));
- biosmem_free_usable_update_start(&start, biosmem_heap_start,
- biosmem_heap_end);
-
- for (i = 0; i < ARRAY_SIZE(biosmem_segments); i++) {
- seg = &biosmem_segments[i];
- biosmem_free_usable_update_start(&start, seg->avail_start,
- seg->avail_end);
- }
-
- return start;
-}
-
-static int __init
-biosmem_free_usable_reserved(phys_addr_t addr)
-{
- const struct biosmem_segment *seg;
- unsigned int i;
-
- if ((addr >= _kvtophys(&_start))
- && (addr < _kvtophys(&_end)))
- return 1;
-
- if ((addr >= biosmem_heap_start) && (addr < biosmem_heap_end))
- return 1;
-
- for (i = 0; i < ARRAY_SIZE(biosmem_segments); i++) {
- seg = &biosmem_segments[i];
-
- if ((addr >= seg->avail_start) && (addr < seg->avail_end))
- return 1;
- }
-
- return 0;
-}
-
-static phys_addr_t __init
-biosmem_free_usable_end(phys_addr_t start, phys_addr_t entry_end)
-{
- while (start < entry_end) {
- if (biosmem_free_usable_reserved(start))
- break;
-
- start += PAGE_SIZE;
- }
-
- return start;
-}
-
-static void __init
biosmem_free_usable_entry(phys_addr_t start, phys_addr_t end)
{
- phys_addr_t entry_end;
-
- entry_end = end;
+ phys_addr_t avail_start, avail_end;
+ int error;
for (;;) {
- start = biosmem_free_usable_start(start);
+ error = biosmem_find_avail(start, end, &avail_start, &avail_end);
- if (start >= entry_end)
- return;
+ if (error) {
+ break;
+ }
- end = biosmem_free_usable_end(start, entry_end);
- biosmem_free_usable_range(start, end);
- start = end;
+ biosmem_free_usable_range(avail_start, avail_end);
+ start = avail_end;
}
}
@@ -889,6 +969,8 @@ biosmem_free_usable(void)
uint64_t start, end;
unsigned int i;
+ biosmem_unregister_temporary_boot_data();
+
for (i = 0; i < biosmem_map_size; i++) {
entry = &biosmem_map[i];
@@ -902,9 +984,17 @@ biosmem_free_usable(void)
end = vm_page_trunc(entry->base_addr + entry->length);
+ if (end > VM_PAGE_HIGHMEM_LIMIT) {
+ end = VM_PAGE_HIGHMEM_LIMIT;
+ }
+
if (start < BIOSMEM_BASE)
start = BIOSMEM_BASE;
+ if (start >= end) {
+ continue;
+ }
+
biosmem_free_usable_entry(start, end);
}
}
diff --git a/i386/i386at/biosmem.h b/i386/i386at/biosmem.h
index 1db63f9..7824c16 100644
--- a/i386/i386at/biosmem.h
+++ b/i386/i386at/biosmem.h
@@ -40,36 +40,51 @@
#define BIOSMEM_END 0x100000
/*
- * Early initialization of the biosmem module.
+ * Report reserved addresses to the biosmem module.
*
- * This function processes the given multiboot data for BIOS-provided
- * memory information, and sets up a bootstrap physical page allocator.
+ * Once all boot data have been registered, the user can set up the
+ * early page allocator.
*
- * It is called before paging is enabled.
+ * If the range is marked temporary, it will be unregistered when
+ * biosmem_free_usable() is called, so that pages that used to store
+ * these boot data may be released to the VM system.
+ */
+void biosmem_register_boot_data(phys_addr_t start, phys_addr_t end,
+ boolean_t temporary);
+
+/*
+ * Initialize the early page allocator.
+ *
+ * This function uses the memory map provided by the boot loader along
+ * with the registered boot data addresses to set up a heap of free pages
+ * of physical memory.
+ *
+ * Note that on Xen, this function registers all the Xen boot information
+ * as boot data itself.
*/
#ifdef MACH_HYP
void biosmem_xen_bootstrap(void);
#else /* MACH_HYP */
-void biosmem_bootstrap(struct multiboot_raw_info *mbi);
+void biosmem_bootstrap(const struct multiboot_raw_info *mbi);
#endif /* MACH_HYP */
/*
* Allocate contiguous physical pages during bootstrap.
*
- * This function is called before paging is enabled. It should only be used
- * to allocate initial page table pages. Those pages are later loaded into
- * the VM system (as reserved pages) which means they can be freed like other
- * regular pages. Users should fix up the type of those pages once the VM
- * system is initialized.
+ * The pages returned are guaranteed to be part of the direct physical
+ * mapping when paging is enabled.
+ *
+ * This function should only be used to allocate initial page table pages.
+ * Those pages are later loaded into the VM system (as reserved pages)
+ * which means they can be freed like other regular pages. Users should
+ * fix up the type of those pages once the VM system is initialized.
*/
unsigned long biosmem_bootalloc(unsigned int nr_pages);
/*
- * Return the amount of physical memory that can be directly mapped.
- *
- * This includes the size of both the DMA/DMA32 and DIRECTMAP segments.
+ * Return the limit of physical memory that can be directly mapped.
*/
-phys_addr_t biosmem_directmap_size(void);
+phys_addr_t biosmem_directmap_end(void);
/*
* Set up physical memory based on the information obtained during bootstrap
@@ -80,8 +95,8 @@ void biosmem_setup(void);
/*
* Free all usable memory.
*
- * This includes ranges that weren't part of the bootstrap allocator initial
- * heap, e.g. because they contained boot data.
+ * This function releases all pages that aren't used by boot data and have
+ * not already been loaded into the VM system.
*/
void biosmem_free_usable(void);
diff --git a/i386/i386at/boothdr.S b/i386/i386at/boothdr.S
index 567851e..9339cb9 100644
--- a/i386/i386at/boothdr.S
+++ b/i386/i386at/boothdr.S
@@ -17,9 +17,9 @@ _start:
/* MultiBoot header - see multiboot.h. */
#define MULTIBOOT_MAGIC 0x1BADB002
#ifdef __ELF__
-#define MULTIBOOT_FLAGS 0x00000002
+#define MULTIBOOT_FLAGS 0x00000003
#else /* __ELF__ */
-#define MULTIBOOT_FLAGS 0x00010002
+#define MULTIBOOT_FLAGS 0x00010003
#endif /* __ELF__ */
P2ALIGN(2)
boot_hdr:
diff --git a/i386/i386at/interrupt.S b/i386/i386at/interrupt.S
index e238ea4..cdb385c 100644
--- a/i386/i386at/interrupt.S
+++ b/i386/i386at/interrupt.S
@@ -49,3 +49,4 @@ ENTRY(interrupt)
outb %al,$(PIC_SLAVE_ICW)
1:
ret /* return */
+END(interrupt)
diff --git a/i386/i386at/mem.c b/i386/i386at/mem.c
index f239afa..eac2549 100644
--- a/i386/i386at/mem.c
+++ b/i386/i386at/mem.c
@@ -36,12 +36,24 @@ dev_t dev;
vm_offset_t off;
vm_prot_t prot;
{
+ struct vm_page *p;
+
if (off == 0)
return 0;
- else if (off < 0xa0000)
- return -1;
- else if (off >= 0x100000 && off < phys_last_addr)
+
+ /*
+ * The legacy device mappings are included in the page tables and
+ * need their own test.
+ */
+ if (off >= 0xa0000 && off < 0x100000)
+ goto out;
+
+ p = vm_page_lookup_pa(off);
+
+ if (p != NULL) {
return -1;
- else
- return i386_btop(off);
+ }
+
+out:
+ return i386_btop(off);
}
diff --git a/i386/i386at/model_dep.c b/i386/i386at/model_dep.c
index 679d524..239f63f 100644
--- a/i386/i386at/model_dep.c
+++ b/i386/i386at/model_dep.c
@@ -66,6 +66,7 @@
#include <i386/model_dep.h>
#include <i386at/autoconf.h>
#include <i386at/biosmem.h>
+#include <i386at/elf.h>
#include <i386at/idt.h>
#include <i386at/int_init.h>
#include <i386at/kd.h>
@@ -105,11 +106,6 @@ static unsigned elf_shdr_shndx;
#define RESERVED_BIOS 0x10000
-/* These indicate the total extent of physical memory addresses we're using.
- They are page-aligned. */
-vm_offset_t phys_first_addr = 0;
-vm_offset_t phys_last_addr;
-
/* A copy of the multiboot info structure passed by the boot loader. */
#ifdef MACH_XEN
struct start_info boot_info;
@@ -153,6 +149,14 @@ void machine_init(void)
cninit();
/*
+ * Make more free memory.
+ *
+ * This is particularly important for the Linux drivers which
+ * require available DMA memory.
+ */
+ biosmem_free_usable();
+
+ /*
* Set up to use floating point.
*/
init_fpu();
@@ -264,6 +268,67 @@ void db_reset_cpu(void)
halt_all_cpus(1);
}
+#ifndef MACH_HYP
+
+static void
+register_boot_data(const struct multiboot_raw_info *mbi)
+{
+ struct multiboot_raw_module *mod;
+ struct elf_shdr *shdr;
+ unsigned long tmp;
+ unsigned int i;
+
+ extern char _start[], _end[];
+
+ /* XXX For now, register all boot data as permanent */
+
+ biosmem_register_boot_data(_kvtophys(&_start), _kvtophys(&_end), FALSE);
+
+ if ((mbi->flags & MULTIBOOT_LOADER_CMDLINE) && (mbi->cmdline != 0)) {
+ biosmem_register_boot_data(mbi->cmdline,
+ mbi->cmdline
+ + strlen((void
*)phystokv(mbi->cmdline)) + 1, FALSE);
+ }
+
+ if (mbi->flags & MULTIBOOT_LOADER_MODULES) {
+ i = mbi->mods_count * sizeof(struct multiboot_raw_module);
+ biosmem_register_boot_data(mbi->mods_addr, mbi->mods_addr + i,
FALSE);
+
+ tmp = phystokv(mbi->mods_addr);
+
+ for (i = 0; i < mbi->mods_count; i++) {
+ mod = (struct multiboot_raw_module *)tmp + i;
+ biosmem_register_boot_data(mod->mod_start,
mod->mod_end, FALSE);
+
+ if (mod->string != 0) {
+ biosmem_register_boot_data(mod->string,
+ mod->string
+ + strlen((void
*)phystokv(mod->string)) + 1,
+ FALSE);
+ }
+ }
+ }
+
+ if (mbi->flags & MULTIBOOT_LOADER_SHDR) {
+ tmp = mbi->shdr_num * mbi->shdr_size;
+ biosmem_register_boot_data(mbi->shdr_addr, mbi->shdr_addr +
tmp, FALSE);
+
+ tmp = phystokv(mbi->shdr_addr);
+
+ for (i = 0; i < mbi->shdr_num; i++) {
+ shdr = (struct elf_shdr *)(tmp + (i * mbi->shdr_size));
+
+ if ((shdr->type != ELF_SHT_SYMTAB)
+ && (shdr->type != ELF_SHT_STRTAB))
+ continue;
+
+ biosmem_register_boot_data(shdr->addr, shdr->addr +
shdr->size, FALSE);
+ }
+ }
+}
+
+#endif /* MACH_HYP */
+
/*
* Basic PC VM initialization.
* Turns on paging and changes the kernel segments to use high linear
addresses.
@@ -291,6 +356,7 @@ i386at_init(void)
#ifdef MACH_HYP
biosmem_xen_bootstrap();
#else /* MACH_HYP */
+ register_boot_data((struct multiboot_raw_info *) &boot_info);
biosmem_bootstrap((struct multiboot_raw_info *) &boot_info);
#endif /* MACH_HYP */
@@ -619,11 +685,6 @@ resettodr(void)
writetodc();
}
-unsigned int pmap_free_pages(void)
-{
- return vm_page_atop(phys_last_addr); /* XXX */
-}
-
boolean_t
init_alloc_aligned(vm_size_t size, vm_offset_t *addrp)
{
@@ -646,15 +707,3 @@ pmap_grab_page(void)
panic("Not enough memory to initialize Mach");
return addr;
}
-
-boolean_t pmap_valid_page(vm_offset_t x)
-{
- /* XXX is this OK? What does it matter for? */
- return (((phys_first_addr <= x) && (x < phys_last_addr))
-#ifndef MACH_HYP
- && !(
- ((boot_info.mem_lower * 1024) <= x) &&
- (x < 1024*1024))
-#endif /* MACH_HYP */
- );
-}
diff --git a/i386/include/mach/i386/asm.h b/i386/include/mach/i386/asm.h
index 4e3b589..45b848d 100644
--- a/i386/include/mach/i386/asm.h
+++ b/i386/include/mach/i386/asm.h
@@ -96,24 +96,24 @@
#ifdef GPROF
#define MCOUNT .data; gLB(9) .long 0; .text; lea LBb(x, 9),%edx; call
mcount
-#define ENTRY(x) .globl EXT(x); .p2align TEXT_ALIGN; LEXT(x) ; \
+#define ENTRY(x) .globl EXT(x); .type EXT(x), @function;
.p2align TEXT_ALIGN; LEXT(x) ; \
pushl %ebp; movl %esp, %ebp; MCOUNT; popl %ebp;
-#define ENTRY2(x,y) .globl EXT(x); .globl EXT(y); \
+#define ENTRY2(x,y) .globl EXT(x); .type EXT(x), @function; .globl
EXT(y); .type EXT(y), @function; \
.p2align TEXT_ALIGN; LEXT(x) LEXT(y)
-#define ASENTRY(x) .globl x; .p2align TEXT_ALIGN; gLB(x) ; \
+#define ASENTRY(x) .globl x; .type x, @function; .p2align
TEXT_ALIGN; gLB(x) ; \
pushl %ebp; movl %esp, %ebp; MCOUNT; popl %ebp;
#define END(x) .size x,.-x
#else /* GPROF */
#define MCOUNT
-#define ENTRY(x) .globl EXT(x); .p2align TEXT_ALIGN; LEXT(x)
-#define ENTRY2(x,y) .globl EXT(x); .globl EXT(y); \
+#define ENTRY(x) .globl EXT(x); .type EXT(x), @function;
.p2align TEXT_ALIGN; LEXT(x)
+#define ENTRY2(x,y) .globl EXT(x); .type EXT(x), @function; .globl
EXT(y); .type EXT(y), @function; \
.p2align TEXT_ALIGN; LEXT(x) LEXT(y)
-#define ASENTRY(x) .globl x; .p2align TEXT_ALIGN; gLB(x)
+#define ASENTRY(x) .globl x; .type x, @function; .p2align
TEXT_ALIGN; gLB(x)
#define END(x) .size x,.-x
#endif /* GPROF */
-#define Entry(x) .globl EXT(x); .p2align TEXT_ALIGN; LEXT(x)
+#define Entry(x) .globl EXT(x); .type EXT(x), @function;
.p2align TEXT_ALIGN; LEXT(x)
#define DATA(x) .globl EXT(x); .p2align DATA_ALIGN; LEXT(x)
#endif /* _MACH_I386_ASM_H_ */
diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c
index e362b45..096e6fd 100644
--- a/i386/intel/pmap.c
+++ b/i386/intel/pmap.c
@@ -83,6 +83,7 @@
#include <i386/proc_reg.h>
#include <i386/locore.h>
#include <i386/model_dep.h>
+#include <i386at/biosmem.h>
#include <i386at/model_dep.h>
#ifdef MACH_PSEUDO_PHYS
@@ -158,9 +159,9 @@ vm_offset_t kernel_virtual_end;
/*
* Index into pv_head table, its lock bits, and the modify/reference
- * bits starting at phys_first_addr.
+ * bits.
*/
-#define pa_index(pa) (atop(pa - phys_first_addr))
+#define pa_index(pa) vm_page_table_index(pa)
#define pai_to_pvh(pai) (&pv_head_table[pai])
#define lock_pvh_pai(pai) (bit_lock(pai, pv_lock_table))
@@ -326,12 +327,7 @@ lock_data_t pmap_system_lock;
#endif /* NCPUS > 1 */
-#define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
-
#ifdef MACH_PV_PAGETABLES
-#if 1
-#define INVALIDATE_TLB(pmap, s, e) hyp_mmuext_op_void(MMUEXT_TLB_FLUSH_LOCAL)
-#else
#define INVALIDATE_TLB(pmap, s, e) do { \
if (__builtin_constant_p((e) - (s)) \
&& (e) - (s) == PAGE_SIZE) \
@@ -339,26 +335,16 @@ lock_data_t pmap_system_lock;
else \
hyp_mmuext_op_void(MMUEXT_TLB_FLUSH_LOCAL); \
} while(0)
-#endif
#else /* MACH_PV_PAGETABLES */
-#if 0
/* It is hard to know when a TLB flush becomes less expensive than a bunch of
* invlpgs. But it surely is more expensive than just one invlpg. */
-#define INVALIDATE_TLB(pmap, s, e) { \
+#define INVALIDATE_TLB(pmap, s, e) do { \
if (__builtin_constant_p((e) - (s)) \
&& (e) - (s) == PAGE_SIZE) \
- invlpg_linear(s); \
+ invlpg_linear((pmap) == kernel_pmap ? kvtolin(s) : (s)); \
else \
flush_tlb(); \
-}
-#else
-#define INVALIDATE_TLB(pmap, s, e) { \
- (void) (pmap); \
- (void) (s); \
- (void) (e); \
- flush_tlb(); \
-}
-#endif
+} while (0)
#endif /* MACH_PV_PAGETABLES */
@@ -497,8 +483,8 @@ void ptep_check(ptep_t ptep)
*/
vm_offset_t pmap_map(
vm_offset_t virt,
- vm_offset_t start,
- vm_offset_t end,
+ phys_addr_t start,
+ phys_addr_t end,
int prot)
{
int ps;
@@ -514,14 +500,14 @@ vm_offset_t pmap_map(
/*
* Back-door routine for mapping kernel VM at initialization.
- * Useful for mapping memory outside the range
- * [phys_first_addr, phys_last_addr) (i.e., devices).
+ * Useful for mapping memory outside the range of direct mapped
+ * physical memory (i.e., devices).
* Otherwise like pmap_map.
*/
vm_offset_t pmap_map_bd(
vm_offset_t virt,
- vm_offset_t start,
- vm_offset_t end,
+ phys_addr_t start,
+ phys_addr_t end,
vm_prot_t prot)
{
pt_entry_t template;
@@ -615,8 +601,8 @@ void pmap_bootstrap(void)
* mapped into the kernel address space,
* and extends to a stupid arbitrary limit beyond that.
*/
- kernel_virtual_start = phystokv(phys_last_addr);
- kernel_virtual_end = phystokv(phys_last_addr) + VM_KERNEL_MAP_SIZE;
+ kernel_virtual_start = phystokv(biosmem_directmap_end());
+ kernel_virtual_end = kernel_virtual_start + VM_KERNEL_MAP_SIZE;
if (kernel_virtual_end < kernel_virtual_start
|| kernel_virtual_end > VM_MAX_KERNEL_ADDRESS)
@@ -707,8 +693,7 @@ void pmap_bootstrap(void)
pt_entry_t global = CPU_HAS_FEATURE(CPU_FEATURE_PGE) ?
INTEL_PTE_GLOBAL : 0;
/*
- * Map virtual memory for all known physical memory, 1-1,
- * from phys_first_addr to phys_last_addr.
+ * Map virtual memory for all directly mappable physical
memory, 1-1,
* Make any mappings completely in the kernel's text segment
read-only.
*
* Also allocate some additional all-null page tables afterwards
@@ -717,7 +702,7 @@ void pmap_bootstrap(void)
* to allocate new kernel page tables later.
* XX fix this
*/
- for (va = phystokv(phys_first_addr); va >=
phystokv(phys_first_addr) && va < kernel_virtual_end; )
+ for (va = phystokv(0); va >= phystokv(0) && va <
kernel_virtual_end; )
{
pt_entry_t *pde = kernel_page_dir +
lin2pdenum(kvtolin(va));
pt_entry_t *ptable =
(pt_entry_t*)phystokv(pmap_grab_page());
@@ -728,7 +713,7 @@ void pmap_bootstrap(void)
| INTEL_PTE_VALID | INTEL_PTE_WRITE);
/* Initialize the page table. */
- for (pte = ptable; (va < phystokv(phys_last_addr)) &&
(pte < ptable+NPTES); pte++)
+ for (pte = ptable; (va <
phystokv(biosmem_directmap_end())) && (pte < ptable+NPTES); pte++)
{
if ((pte - ptable) < ptenum(va))
{
@@ -906,13 +891,20 @@ pmap_mapwindow_t *pmap_get_mapwindow(pt_entry_t entry)
{
pmap_mapwindow_t *map;
+ assert(entry != 0);
+
/* Find an empty one. */
for (map = &mapwindows[0]; map < &mapwindows[sizeof (mapwindows) /
sizeof (*mapwindows)]; map++)
if (!(*map->entry))
break;
assert(map < &mapwindows[sizeof (mapwindows) / sizeof (*mapwindows)]);
+#ifdef MACH_PV_PAGETABLES
+ if (!hyp_mmu_update_pte(kv_to_ma(map->entry), pa_to_ma(entry)))
+ panic("pmap_get_mapwindow");
+#else /* MACH_PV_PAGETABLES */
WRITE_PTE(map->entry, entry);
+#endif /* MACH_PV_PAGETABLES */
return map;
}
@@ -921,7 +913,12 @@ pmap_mapwindow_t *pmap_get_mapwindow(pt_entry_t entry)
*/
void pmap_put_mapwindow(pmap_mapwindow_t *map)
{
+#ifdef MACH_PV_PAGETABLES
+ if (!hyp_mmu_update_pte(kv_to_ma(map->entry), 0))
+ panic("pmap_put_mapwindow");
+#else /* MACH_PV_PAGETABLES */
WRITE_PTE(map->entry, 0);
+#endif /* MACH_PV_PAGETABLES */
PMAP_UPDATE_TLBS(kernel_pmap, map->vaddr, map->vaddr + PAGE_SIZE);
}
@@ -940,7 +937,7 @@ void pmap_virtual_space(
*/
void pmap_init(void)
{
- long npages;
+ unsigned long npages;
vm_offset_t addr;
vm_size_t s;
#if NCPUS > 1
@@ -952,7 +949,7 @@ void pmap_init(void)
* the modify bit array, and the pte_page table.
*/
- npages = atop(phys_last_addr - phys_first_addr);
+ npages = vm_page_table_size();
s = (vm_size_t) (sizeof(struct pv_entry) * npages
+ pv_lock_table_size(npages)
+ npages);
@@ -1000,31 +997,16 @@ void pmap_init(void)
pmap_initialized = TRUE;
}
-#define valid_page(x) (pmap_initialized && pmap_valid_page(x))
-
-boolean_t pmap_verify_free(vm_offset_t phys)
+static inline boolean_t
+valid_page(phys_addr_t addr)
{
- pv_entry_t pv_h;
- int pai;
- int spl;
- boolean_t result;
+ struct vm_page *p;
- assert(phys != vm_page_fictitious_addr);
if (!pmap_initialized)
- return(TRUE);
-
- if (!pmap_valid_page(phys))
- return(FALSE);
-
- PMAP_WRITE_LOCK(spl);
-
- pai = pa_index(phys);
- pv_h = pai_to_pvh(pai);
-
- result = (pv_h->pmap == PMAP_NULL);
- PMAP_WRITE_UNLOCK(spl);
+ return FALSE;
- return(result);
+ p = vm_page_lookup_pa(addr);
+ return (p != NULL);
}
/*
@@ -1049,12 +1031,12 @@ pmap_page_table_page_alloc(void)
* Allocate it now if it is missing.
*/
if (pmap_object == VM_OBJECT_NULL)
- pmap_object = vm_object_allocate(phys_last_addr - phys_first_addr);
+ pmap_object = vm_object_allocate(vm_page_table_size() * PAGE_SIZE);
/*
* Allocate a VM page for the level 2 page table entries.
*/
- while ((m = vm_page_grab(FALSE)) == VM_PAGE_NULL)
+ while ((m = vm_page_grab()) == VM_PAGE_NULL)
VM_PAGE_WAIT((void (*)()) 0);
/*
@@ -1232,7 +1214,7 @@ pmap_t pmap_create(vm_size_t size)
void pmap_destroy(pmap_t p)
{
pt_entry_t *pdep;
- vm_offset_t pa;
+ phys_addr_t pa;
int c, s;
vm_page_t m;
@@ -1327,9 +1309,9 @@ void pmap_remove_range(
pt_entry_t *epte)
{
pt_entry_t *cpte;
- int num_removed, num_unwired;
- int pai;
- vm_offset_t pa;
+ unsigned long num_removed, num_unwired;
+ unsigned long pai;
+ phys_addr_t pa;
#ifdef MACH_PV_PAGETABLES
int n, ii = 0;
struct mmu_update update[HYP_BATCH_MMU_UPDATES];
@@ -1519,13 +1501,13 @@ void pmap_remove(
* page.
*/
void pmap_page_protect(
- vm_offset_t phys,
+ phys_addr_t phys,
vm_prot_t prot)
{
pv_entry_t pv_h, prev;
pv_entry_t pv_e;
pt_entry_t *pte;
- int pai;
+ unsigned long pai;
pmap_t pmap;
int spl;
boolean_t remove;
@@ -1791,17 +1773,18 @@ void pmap_protect(
void pmap_enter(
pmap_t pmap,
vm_offset_t v,
- vm_offset_t pa,
+ phys_addr_t pa,
vm_prot_t prot,
boolean_t wired)
{
+ boolean_t is_physmem;
pt_entry_t *pte;
pv_entry_t pv_h;
- int i, pai;
+ unsigned long i, pai;
pv_entry_t pv_e;
pt_entry_t template;
int spl;
- vm_offset_t old_pa;
+ phys_addr_t old_pa;
assert(pa != vm_page_fictitious_addr);
if (pmap_debug) printf("pmap(%lx, %lx)\n", v, pa);
@@ -1926,6 +1909,11 @@ Retry:
continue;
}
+ if (vm_page_ready())
+ is_physmem = (vm_page_lookup_pa(pa) != NULL);
+ else
+ is_physmem = (pa < biosmem_directmap_end());
+
/*
* Special case if the physical page is already mapped
* at this address.
@@ -1947,7 +1935,7 @@ Retry:
if (prot & VM_PROT_WRITE)
template |= INTEL_PTE_WRITE;
if (machine_slot[cpu_number()].cpu_type >= CPU_TYPE_I486
- && pa >= phys_last_addr)
+ && !is_physmem)
template |= INTEL_PTE_NCACHE|INTEL_PTE_WTHRU;
if (wired)
template |= INTEL_PTE_WIRED;
@@ -2059,7 +2047,7 @@ Retry:
if (prot & VM_PROT_WRITE)
template |= INTEL_PTE_WRITE;
if (machine_slot[cpu_number()].cpu_type >= CPU_TYPE_I486
- && pa >= phys_last_addr)
+ && !is_physmem)
template |= INTEL_PTE_NCACHE|INTEL_PTE_WTHRU;
if (wired)
template |= INTEL_PTE_WIRED;
@@ -2145,20 +2133,20 @@ void pmap_change_wiring(
* with the given map/virtual_address pair.
*/
-vm_offset_t pmap_extract(
+phys_addr_t pmap_extract(
pmap_t pmap,
vm_offset_t va)
{
pt_entry_t *pte;
- vm_offset_t pa;
+ phys_addr_t pa;
int spl;
SPLVM(spl);
simple_lock(&pmap->lock);
if ((pte = pmap_pte(pmap, va)) == PT_ENTRY_NULL)
- pa = (vm_offset_t) 0;
+ pa = 0;
else if (!(*pte & INTEL_PTE_VALID))
- pa = (vm_offset_t) 0;
+ pa = 0;
else
pa = pte_to_pa(*pte) + (va & INTEL_OFFMASK);
simple_unlock(&pmap->lock);
@@ -2199,7 +2187,7 @@ void pmap_collect(pmap_t p)
{
pt_entry_t *pdp, *ptp;
pt_entry_t *eptp;
- vm_offset_t pa;
+ phys_addr_t pa;
int spl, wired;
if (p == PMAP_NULL)
@@ -2415,13 +2403,13 @@ pmap_pageable(
*/
void
phys_attribute_clear(
- vm_offset_t phys,
+ phys_addr_t phys,
int bits)
{
pv_entry_t pv_h;
pv_entry_t pv_e;
pt_entry_t *pte;
- int pai;
+ unsigned long pai;
pmap_t pmap;
int spl;
@@ -2499,13 +2487,13 @@ phys_attribute_clear(
*/
boolean_t
phys_attribute_test(
- vm_offset_t phys,
+ phys_addr_t phys,
int bits)
{
pv_entry_t pv_h;
pv_entry_t pv_e;
pt_entry_t *pte;
- int pai;
+ unsigned long pai;
pmap_t pmap;
int spl;
@@ -2587,7 +2575,7 @@ phys_attribute_test(
* Clear the modify bits on the specified physical page.
*/
-void pmap_clear_modify(vm_offset_t phys)
+void pmap_clear_modify(phys_addr_t phys)
{
phys_attribute_clear(phys, PHYS_MODIFIED);
}
@@ -2599,7 +2587,7 @@ void pmap_clear_modify(vm_offset_t phys)
* by any physical maps.
*/
-boolean_t pmap_is_modified(vm_offset_t phys)
+boolean_t pmap_is_modified(phys_addr_t phys)
{
return (phys_attribute_test(phys, PHYS_MODIFIED));
}
@@ -2610,7 +2598,7 @@ boolean_t pmap_is_modified(vm_offset_t phys)
* Clear the reference bit on the specified physical page.
*/
-void pmap_clear_reference(vm_offset_t phys)
+void pmap_clear_reference(phys_addr_t phys)
{
phys_attribute_clear(phys, PHYS_REFERENCED);
}
@@ -2622,7 +2610,7 @@ void pmap_clear_reference(vm_offset_t phys)
* by any physical maps.
*/
-boolean_t pmap_is_referenced(vm_offset_t phys)
+boolean_t pmap_is_referenced(phys_addr_t phys)
{
return (phys_attribute_test(phys, PHYS_REFERENCED));
}
diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h
index 382cd5f..e6a3ede 100644
--- a/i386/intel/pmap.h
+++ b/i386/intel/pmap.h
@@ -64,11 +64,7 @@
* i386/i486 Page Table Entry
*/
-#if PAE
-typedef unsigned long long pt_entry_t;
-#else /* PAE */
-typedef unsigned int pt_entry_t;
-#endif /* PAE */
+typedef phys_addr_t pt_entry_t;
#define PT_ENTRY_NULL ((pt_entry_t *) 0)
#endif /* __ASSEMBLER__ */
@@ -447,19 +443,19 @@ extern void pmap_unmap_page_zero (void);
/*
* pmap_zero_page zeros the specified (machine independent) page.
*/
-extern void pmap_zero_page (vm_offset_t);
+extern void pmap_zero_page (phys_addr_t);
/*
* pmap_copy_page copies the specified (machine independent) pages.
*/
-extern void pmap_copy_page (vm_offset_t, vm_offset_t);
+extern void pmap_copy_page (phys_addr_t, phys_addr_t);
/*
* kvtophys(addr)
*
* Convert a kernel virtual address to a physical address
*/
-extern vm_offset_t kvtophys (vm_offset_t);
+extern phys_addr_t kvtophys (vm_offset_t);
void pmap_remove_range(
pmap_t pmap,
diff --git a/i386/xen/xen.c b/i386/xen/xen.c
index 8b015c4..d10ecf3 100644
--- a/i386/xen/xen.c
+++ b/i386/xen/xen.c
@@ -58,7 +58,7 @@ void hypclock_machine_intr(int old_ipl, void *ret_addr,
struct i386_interrupt_st
}
void hyp_p2m_init(void) {
- unsigned long nb_pfns = atop(phys_last_addr);
+ unsigned long nb_pfns = vm_page_table_size();
#ifdef MACH_PSEUDO_PHYS
#define P2M_PAGE_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
unsigned long *l3 = (unsigned long *)phystokv(pmap_grab_page()), *l2 =
NULL;
diff --git a/ipc/ipc_init.c b/ipc/ipc_init.c
index 5ed800f..8e628ad 100644
--- a/ipc/ipc_init.c
+++ b/ipc/ipc_init.c
@@ -111,7 +111,7 @@ ipc_init(void)
vm_offset_t min, max;
kmem_submap(ipc_kernel_map, kernel_map, &min, &max,
- ipc_kernel_map_size, TRUE);
+ ipc_kernel_map_size);
ipc_host_init();
}
diff --git a/ipc/mach_debug.c b/ipc/mach_debug.c
index efb07a4..6ddc89b 100644
--- a/ipc/mach_debug.c
+++ b/ipc/mach_debug.c
@@ -257,6 +257,9 @@ mach_port_kernel_object(
ipc_port_t port;
kern_return_t kr;
+ if (space == IS_NULL)
+ return KERN_INVALID_TASK;
+
kr = ipc_right_lookup_read(space, name, &entry);
if (kr != KERN_SUCCESS)
return kr;
diff --git a/kern/lock.c b/kern/lock.c
index 1daf1b4..a4b8252 100644
--- a/kern/lock.c
+++ b/kern/lock.c
@@ -175,6 +175,7 @@ void _simple_lock(
l->lock_data = 1;
info = &simple_locks_info[simple_locks_taken++];
+ barrier();
info->l = l;
info->expr = expression;
info->loc = location;
@@ -193,6 +194,7 @@ boolean_t _simple_lock_try(
l->lock_data = 1;
info = &simple_locks_info[simple_locks_taken++];
+ barrier();
info->l = l;
info->expr = expression;
info->loc = location;
@@ -219,6 +221,7 @@ void simple_unlock(
simple_locks_info[i] = simple_locks_info[simple_locks_taken-1];
}
+ barrier();
simple_locks_taken--;
simple_locks_info[simple_locks_taken] = (struct simple_locks_info) {0};
}
diff --git a/kern/slab.c b/kern/slab.c
index 1f8e000..d4ef847 100644
--- a/kern/slab.c
+++ b/kern/slab.c
@@ -370,7 +370,7 @@ kmem_pagealloc_physmem(vm_size_t size)
assert(size == PAGE_SIZE);
for (;;) {
- page = vm_page_grab_contig(size, VM_PAGE_SEL_DIRECTMAP);
+ page = vm_page_grab();
if (page != NULL)
break;
@@ -389,7 +389,7 @@ kmem_pagefree_physmem(vm_offset_t addr, vm_size_t size)
assert(size == PAGE_SIZE);
page = vm_page_lookup_pa(kvtophys(addr));
assert(page != NULL);
- vm_page_free_contig(page, size);
+ vm_page_release(page, FALSE, FALSE);
}
static vm_offset_t
diff --git a/kern/startup.c b/kern/startup.c
index c87cbb1..19bd7bf 100644
--- a/kern/startup.c
+++ b/kern/startup.c
@@ -78,7 +78,6 @@ boolean_t reboot_on_panic = TRUE;
#endif /* NCPUS > 1 */
/* XX */
-extern vm_offset_t phys_first_addr, phys_last_addr;
extern char *kernel_cmdline;
/*
diff --git a/kern/strings.c b/kern/strings.c
index 64410d9..71c9905 100644
--- a/kern/strings.c
+++ b/kern/strings.c
@@ -179,6 +179,7 @@ strlen(
* The return value is a pointer to the "s" string.
*/
+#if 0
void *
memset(
void *_s, int c, size_t n)
@@ -191,3 +192,105 @@ memset(
return _s;
}
+#endif
+
+/*
+ * Abstract:
+ * strchr returns a pointer to the first occurrence of the character
+ * "c" in the string "s". If "c" is not found, return NULL.
+ */
+char *
+strchr(
+ const char *s,
+ int c)
+{
+ while (*s != c) {
+ if (*s == '\0') {
+ return NULL;
+ }
+
+ s++;
+ }
+
+ return (char *)s;
+}
+
+/*
+ * Abstract:
+ * strsep extracts tokens from strings. If "*sp" is NULL, return NULL
+ * and do nothing. Otherwise, find the first token in string "*sp".
+ * Tokens are delimited by characters in the string "delim". If no
+ * delimiter is found, the token is the entire string "*sp", and "*sp"
+ * is made NULL. Otherwise, overwrite the delimiter with a null byte,
+ * and make "*sp" point past it.
+ */
+char *
+strsep(
+ char **sp,
+ const char *delim)
+{
+ const char *d;
+ char *s, *t;
+
+ s = t = *sp;
+
+ if (s == NULL) {
+ return NULL;
+ }
+
+ for (;;) {
+ if (*s == '\0') {
+ *sp = NULL;
+ return t;
+ }
+
+ d = delim;
+
+ for (;;) {
+ if (*d == '\0') {
+ break;
+ }
+
+ if (*d == *s) {
+ *s = '\0';
+ *sp = s + 1;
+ return t;
+ }
+
+ d++;
+ }
+
+ s++;
+ }
+}
+
+/*
+ * Abstract:
+ * strstr returns a pointer to the first occurrence of the substring
+ * "find" in the string "s". If no substring was found, return NULL.
+ */
+char *
+strstr(
+ const char *s,
+ const char *find)
+{
+ size_t len;
+
+ len = strlen(find);
+
+ if (len == 0) {
+ return (char *)s;
+ }
+
+ for (;;) {
+ if (*s == '\0') {
+ return NULL;
+ }
+
+ if (strncmp(s, find, len) == 0) {
+ return (char *)s;
+ }
+
+ s++;
+ }
+}
diff --git a/kern/task.c b/kern/task.c
index 673a437..7dff124 100644
--- a/kern/task.c
+++ b/kern/task.c
@@ -104,7 +104,7 @@ kern_return_t task_create(
} else {
new_task->map = vm_map_create(pmap_create(0),
round_page(VM_MIN_ADDRESS),
- trunc_page(VM_MAX_ADDRESS), TRUE);
+ trunc_page(VM_MAX_ADDRESS));
vm_map_set_name(new_task->map, new_task->name);
}
diff --git a/kern/thread.c b/kern/thread.c
index 7db1f3d..0ac7c53 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -342,7 +342,7 @@ void thread_init(void)
/* thread_template.sched_stamp (later) */
thread_template.recover = (vm_offset_t) 0;
- thread_template.vm_privilege = FALSE;
+ thread_template.vm_privilege = 0;
thread_template.user_stop_count = 1;
@@ -2233,11 +2233,11 @@ thread_wire(
thread_lock(thread);
if (wired) {
- thread->vm_privilege = TRUE;
+ thread->vm_privilege = 1;
stack_privilege(thread);
}
else {
- thread->vm_privilege = FALSE;
+ thread->vm_privilege = 0;
/*XXX stack_unprivilege(thread); */
thread->stack_privilege = 0;
}
diff --git a/kern/thread.h b/kern/thread.h
index 7106fd2..f0ed71a 100644
--- a/kern/thread.h
+++ b/kern/thread.h
@@ -77,7 +77,6 @@ struct thread {
struct {
unsigned state:16;
unsigned wake_active:1;
- unsigned vm_privilege:1;
unsigned active:1;
};
event_t event_key;
@@ -146,8 +145,8 @@ struct thread {
/* VM global variables */
vm_offset_t recover; /* page fault recovery (copyin/out) */
- /* Defined above */
- /* boolean_t vm_privilege; Can use reserved memory? */
+ unsigned int vm_privilege; /* Can use reserved memory?
+ Implemented as a counter */
/* User-visible scheduling state */
int user_stop_count; /* outstanding stops */
diff --git a/linux/dev/glue/block.c b/linux/dev/glue/block.c
index 74126eb..c1d922b 100644
--- a/linux/dev/glue/block.c
+++ b/linux/dev/glue/block.c
@@ -50,6 +50,7 @@
#include <mach/notify.h>
#include <kern/kalloc.h>
+#include <kern/list.h>
#include <ipc/ipc_port.h>
#include <ipc/ipc_space.h>
@@ -97,7 +98,7 @@ struct temp_data
struct inode inode;
struct file file;
struct request req;
- queue_head_t pages;
+ struct list pages;
};
/* One of these exists for each
@@ -302,11 +303,11 @@ alloc_buffer (int size)
if (! linux_auto_config)
{
- while ((m = vm_page_grab (FALSE)) == 0)
+ while ((m = vm_page_grab ()) == 0)
VM_PAGE_WAIT (0);
d = current_thread ()->pcb->data;
assert (d);
- queue_enter (&d->pages, m, vm_page_t, pageq);
+ list_insert_tail (&d->pages, &m->node);
return (void *) phystokv(m->phys_addr);
}
return (void *) __get_free_pages (GFP_KERNEL, 0, ~0UL);
@@ -317,7 +318,7 @@ static void
free_buffer (void *p, int size)
{
struct temp_data *d;
- vm_page_t m;
+ vm_page_t m, tmp;
assert (size <= PAGE_SIZE);
@@ -325,11 +326,11 @@ free_buffer (void *p, int size)
{
d = current_thread ()->pcb->data;
assert (d);
- queue_iterate (&d->pages, m, vm_page_t, pageq)
+ list_for_each_entry_safe (&d->pages, m, tmp, node)
{
if (phystokv(m->phys_addr) == (vm_offset_t) p)
{
- queue_remove (&d->pages, m, vm_page_t, pageq);
+ list_remove (&m->node);
VM_PAGE_FREE (m);
return;
}
@@ -992,7 +993,7 @@ check:
#define DECL_DATA struct temp_data td
#define INIT_DATA() \
{ \
- queue_init (&td.pages); \
+ list_init (&td.pages); \
td.inode.i_rdev = bd->dev; \
td.file.f_mode = bd->mode; \
td.file.f_flags = bd->flags; \
@@ -1046,7 +1047,7 @@ device_open (ipc_port_t reply_port, mach_msg_type_name_t
reply_port_type,
minor <<= gd->minor_shift;
dev = MKDEV (major, minor);
- queue_init (&td.pages);
+ list_init (&td.pages);
current_thread ()->pcb->data = &td;
/* Check partition. */
@@ -1417,7 +1418,7 @@ device_read (void *d, ipc_port_t reply_port,
boolean_t dirty;
int resid, amt;
io_return_t err = 0;
- queue_head_t pages;
+ struct list pages;
vm_map_copy_t copy;
vm_offset_t addr, offset, alloc_offset, o;
vm_object_t object;
@@ -1460,7 +1461,7 @@ device_read (void *d, ipc_port_t reply_port,
if (err)
goto out;
- queue_init (&pages);
+ list_init (&pages);
while (resid)
{
@@ -1471,7 +1472,7 @@ device_read (void *d, ipc_port_t reply_port,
/* Map any pages left from previous operation. */
o = trunc_page (offset);
- queue_iterate (&pages, m, vm_page_t, pageq)
+ list_for_each_entry (&pages, m, node)
{
pmap_enter (vm_map_pmap (device_io_map),
addr + o - trunc_page (offset),
@@ -1483,11 +1484,11 @@ device_read (void *d, ipc_port_t reply_port,
/* Allocate and map pages. */
while (alloc_offset < trunc_page (offset) + len)
{
- while ((m = vm_page_grab (FALSE)) == 0)
+ while ((m = vm_page_grab ()) == 0)
VM_PAGE_WAIT (0);
assert (! m->active && ! m->inactive);
m->busy = TRUE;
- queue_enter (&pages, m, vm_page_t, pageq);
+ list_insert_tail (&pages, &m->node);
pmap_enter (vm_map_pmap (device_io_map),
addr + alloc_offset - trunc_page (offset),
m->phys_addr, VM_PROT_READ|VM_PROT_WRITE, TRUE);
@@ -1529,9 +1530,9 @@ device_read (void *d, ipc_port_t reply_port,
vm_object_lock (object);
while (o < trunc_page (offset))
{
- m = (vm_page_t) queue_first (&pages);
- assert (! queue_end (&pages, (queue_entry_t) m));
- queue_remove (&pages, m, vm_page_t, pageq);
+ m = list_first_entry (&pages, struct vm_page, node);
+ assert (! list_end (&pages, &m->node));
+ list_remove (&m->node);
assert (m->busy);
vm_page_lock_queues ();
if (dirty)
@@ -1557,7 +1558,7 @@ device_read (void *d, ipc_port_t reply_port,
/* Delete kernel buffer. */
vm_map_remove (device_io_map, addr, addr + size);
- assert (queue_empty (&pages));
+ assert (list_empty (&pages));
out:
if (! err)
diff --git a/linux/dev/glue/net.c b/linux/dev/glue/net.c
index 1573273..6b9cadd 100644
--- a/linux/dev/glue/net.c
+++ b/linux/dev/glue/net.c
@@ -428,62 +428,43 @@ device_write (void *d, ipc_port_t reply_port,
int *bytes_written)
{
unsigned char *p;
- int i, amt, skblen, s;
+ int i, s;
vm_map_copy_t copy = (vm_map_copy_t) data;
+ char *map_data;
+ vm_offset_t map_addr;
+ vm_size_t map_size;
struct net_data *nd = d;
struct linux_device *dev = nd->dev;
struct sk_buff *skb;
+ kern_return_t kr;
if (count == 0 || count > dev->mtu + dev->hard_header_len)
return D_INVALID_SIZE;
/* Allocate a sk_buff. */
- amt = PAGE_SIZE - (copy->offset & PAGE_MASK);
- skblen = (amt >= count) ? 0 : count;
- skb = dev_alloc_skb (skblen);
+ skb = dev_alloc_skb (count);
if (!skb)
return D_NO_MEMORY;
- /* Copy user data. This is only required if it spans multiple pages. */
- if (skblen == 0)
- {
- assert (copy->cpy_npages == 1);
-
- skb->copy = copy;
- skb->data = ((void *) phystokv(copy->cpy_page_list[0]->phys_addr)
- + (copy->offset & PAGE_MASK));
- skb->len = count;
- skb->head = skb->data;
- skb->tail = skb->data + skb->len;
- skb->end = skb->tail;
- }
- else
- {
- skb->len = skblen;
- skb->tail = skb->data + skblen;
- skb->end = skb->tail;
-
- memcpy (skb->data,
- ((void *) phystokv(copy->cpy_page_list[0]->phys_addr)
- + (copy->offset & PAGE_MASK)),
- amt);
- count -= amt;
- p = skb->data + amt;
- for (i = 1; count > 0 && i < copy->cpy_npages; i++)
- {
- amt = PAGE_SIZE;
- if (amt > count)
- amt = count;
- memcpy (p, (void *) phystokv(copy->cpy_page_list[i]->phys_addr), amt);
- count -= amt;
- p += amt;
- }
+ /* Map user data. */
+ kr = kmem_io_map_copyout(device_io_map, (vm_offset_t *)&map_data,
+ &map_addr, &map_size, copy, count);
- assert (count == 0);
+ if (kr) {
+ dev_kfree_skb (skb, FREE_WRITE);
+ return D_NO_MEMORY;
+ }
- vm_map_copy_discard (copy);
- }
+ /* XXX The underlying physical pages of the mapping could be highmem,
+ for which drivers require the use of a bounce buffer. */
+ memcpy (skb->data, map_data, count);
+ kmem_io_map_deallocate (device_io_map, map_addr, map_size);
+ vm_map_copy_discard (copy);
+ skb->len = count;
+ skb->head = skb->data;
+ skb->tail = skb->data + skb->len;
+ skb->end = skb->tail;
skb->dev = dev;
skb->reply = reply_port;
skb->reply_type = reply_port_type;
diff --git a/linux/dev/init/main.c b/linux/dev/init/main.c
index d69b3fc..3740c12 100644
--- a/linux/dev/init/main.c
+++ b/linux/dev/init/main.c
@@ -104,7 +104,7 @@ linux_init (void)
/*
* Initialize memory size.
*/
- high_memory = phys_last_addr;
+ high_memory = vm_page_seg_end(VM_PAGE_SEL_DIRECTMAP);
init_IRQ ();
linux_sched_init ();
diff --git a/version.m4 b/version.m4
index 3a7512a..330aa15 100644
--- a/version.m4
+++ b/version.m4
@@ -1,4 +1,4 @@
m4_define([AC_PACKAGE_NAME],[GNU Mach])
-m4_define([AC_PACKAGE_VERSION],[1.7+git20160809])
+m4_define([AC_PACKAGE_VERSION],[1.7+git20160921])
m4_define([AC_PACKAGE_BUGREPORT],address@hidden)
m4_define([AC_PACKAGE_TARNAME],[gnumach])
diff --git a/vm/pmap.h b/vm/pmap.h
index 9bbcdc3..2201b44 100644
--- a/vm/pmap.h
+++ b/vm/pmap.h
@@ -65,8 +65,6 @@
/* During VM initialization, steal a chunk of memory. */
extern vm_offset_t pmap_steal_memory(vm_size_t);
-/* During VM initialization, report remaining unused physical pages. */
-extern unsigned int pmap_free_pages(void);
/* Initialization, after kernel runs in virtual memory. */
extern void pmap_init(void);
@@ -75,14 +73,10 @@ extern void pmap_init(void);
* If machine/pmap.h defines MACHINE_PAGES, it must implement
* the above functions. The pmap module has complete control.
* Otherwise, it must implement
- * pmap_free_pages
* pmap_virtual_space
* pmap_init
* and vm/vm_resident.c implements pmap_steal_memory using
- * pmap_free_pages, pmap_virtual_space, and pmap_enter.
- *
- * pmap_free_pages may over-estimate the number of unused physical pages.
- * However, for best performance pmap_free_pages should be accurate.
+ * pmap_virtual_space and pmap_enter.
*/
/* During VM initialization, report virtual space available for the kernel. */
@@ -106,7 +100,7 @@ extern void pmap_reference(pmap_t pmap);
extern void pmap_destroy(pmap_t pmap);
/* Enter a mapping */
-extern void pmap_enter(pmap_t pmap, vm_offset_t va, vm_offset_t pa,
+extern void pmap_enter(pmap_t pmap, vm_offset_t va, phys_addr_t pa,
vm_prot_t prot, boolean_t wired);
@@ -134,7 +128,7 @@ extern void pmap_deactivate(pmap_t, thread_t, int);
*/
/* Restrict access to page. */
-void pmap_page_protect(vm_offset_t pa, vm_prot_t prot);
+void pmap_page_protect(phys_addr_t pa, vm_prot_t prot);
/*
* Routines to manage reference/modify bits based on
@@ -143,24 +137,24 @@ void pmap_page_protect(vm_offset_t pa, vm_prot_t prot);
*/
/* Clear reference bit */
-void pmap_clear_reference(vm_offset_t pa);
+void pmap_clear_reference(phys_addr_t pa);
/* Return reference bit */
#ifndef pmap_is_referenced
-boolean_t pmap_is_referenced(vm_offset_t pa);
+boolean_t pmap_is_referenced(phys_addr_t pa);
#endif /* pmap_is_referenced */
/* Clear modify bit */
-void pmap_clear_modify(vm_offset_t pa);
+void pmap_clear_modify(phys_addr_t pa);
/* Return modify bit */
-boolean_t pmap_is_modified(vm_offset_t pa);
+boolean_t pmap_is_modified(phys_addr_t pa);
/*
* Sundry required routines
*/
/* Return a virtual-to-physical mapping, if possible. */
-extern vm_offset_t pmap_extract(pmap_t, vm_offset_t);
+extern phys_addr_t pmap_extract(pmap_t, vm_offset_t);
/* Perform garbage collection, if any. */
extern void pmap_collect(pmap_t);
/* Specify pageability. */
@@ -186,8 +180,6 @@ extern kern_return_t pmap_attribute(void);
*/
extern vm_offset_t pmap_grab_page (void);
-extern boolean_t pmap_valid_page(vm_offset_t x);
-
/*
* Make the specified pages (by pmap, offset)
* pageable (or not) as requested.
@@ -200,14 +192,14 @@ extern void pmap_pageable(
/*
* Back-door routine for mapping kernel VM at initialization.
- * Useful for mapping memory outside the range
- * [phys_first_addr, phys_last_addr) (i.e., devices).
+ * Useful for mapping memory outside the range of direct mapped
+ * physical memory (i.e., devices).
* Otherwise like pmap_map.
*/
extern vm_offset_t pmap_map_bd(
vm_offset_t virt,
- vm_offset_t start,
- vm_offset_t end,
+ phys_addr_t start,
+ phys_addr_t end,
vm_prot_t prot);
/*
diff --git a/vm/vm_fault.c b/vm/vm_fault.c
index 68afc59..99381ef 100644
--- a/vm/vm_fault.c
+++ b/vm/vm_fault.c
@@ -423,7 +423,7 @@ vm_fault_return_t vm_fault_page(
* need to allocate a real page.
*/
- real_m =
vm_page_grab(!object->internal);
+ real_m = vm_page_grab();
if (real_m == VM_PAGE_NULL) {
vm_fault_cleanup(object,
first_m);
return(VM_FAULT_MEMORY_SHORTAGE);
@@ -607,7 +607,7 @@ vm_fault_return_t vm_fault_page(
* won't block for pages.
*/
- if (m->fictitious && !vm_page_convert(&m,
FALSE)) {
+ if (m->fictitious && !vm_page_convert(&m)) {
VM_PAGE_FREE(m);
vm_fault_cleanup(object, first_m);
return(VM_FAULT_MEMORY_SHORTAGE);
@@ -725,7 +725,7 @@ vm_fault_return_t vm_fault_page(
assert(m->object == object);
first_m = VM_PAGE_NULL;
- if (m->fictitious && !vm_page_convert(&m,
!object->internal)) {
+ if (m->fictitious && !vm_page_convert(&m)) {
VM_PAGE_FREE(m);
vm_fault_cleanup(object, VM_PAGE_NULL);
return(VM_FAULT_MEMORY_SHORTAGE);
@@ -810,7 +810,7 @@ vm_fault_return_t vm_fault_page(
/*
* Allocate a page for the copy
*/
- copy_m = vm_page_grab(!first_object->internal);
+ copy_m = vm_page_grab();
if (copy_m == VM_PAGE_NULL) {
RELEASE_PAGE(m);
vm_fault_cleanup(object, first_m);
diff --git a/vm/vm_kern.c b/vm/vm_kern.c
index 9c0a20b..81bb153 100644
--- a/vm/vm_kern.c
+++ b/vm/vm_kern.c
@@ -778,8 +778,7 @@ kmem_submap(
vm_map_t parent,
vm_offset_t *min,
vm_offset_t *max,
- vm_size_t size,
- boolean_t pageable)
+ vm_size_t size)
{
vm_offset_t addr;
kern_return_t kr;
@@ -802,7 +801,7 @@ kmem_submap(
panic("kmem_submap");
pmap_reference(vm_map_pmap(parent));
- vm_map_setup(map, vm_map_pmap(parent), addr, addr + size, pageable);
+ vm_map_setup(map, vm_map_pmap(parent), addr, addr + size);
kr = vm_map_submap(parent, addr, addr + size, map);
if (kr != KERN_SUCCESS)
panic("kmem_submap");
@@ -821,8 +820,7 @@ void kmem_init(
vm_offset_t start,
vm_offset_t end)
{
- vm_map_setup(kernel_map, pmap_kernel(), VM_MIN_KERNEL_ADDRESS, end,
- FALSE);
+ vm_map_setup(kernel_map, pmap_kernel(), VM_MIN_KERNEL_ADDRESS, end);
/*
* Reserve virtual memory allocated up to this time.
diff --git a/vm/vm_kern.h b/vm/vm_kern.h
index fb8ac7f..4bd89c4 100644
--- a/vm/vm_kern.h
+++ b/vm/vm_kern.h
@@ -57,7 +57,7 @@ extern kern_return_t kmem_alloc_aligned(vm_map_t,
vm_offset_t *, vm_size_t);
extern void kmem_free(vm_map_t, vm_offset_t, vm_size_t);
extern void kmem_submap(vm_map_t, vm_map_t, vm_offset_t *,
- vm_offset_t *, vm_size_t, boolean_t);
+ vm_offset_t *, vm_size_t);
extern kern_return_t kmem_io_map_copyout(vm_map_t, vm_offset_t *,
vm_offset_t *, vm_size_t *,
diff --git a/vm/vm_map.c b/vm/vm_map.c
index acac66e..249d18a 100644
--- a/vm/vm_map.c
+++ b/vm/vm_map.c
@@ -175,13 +175,11 @@ void vm_map_setup(
vm_map_t map,
pmap_t pmap,
vm_offset_t min,
- vm_offset_t max,
- boolean_t pageable)
+ vm_offset_t max)
{
vm_map_first_entry(map) = vm_map_to_entry(map);
vm_map_last_entry(map) = vm_map_to_entry(map);
map->hdr.nentries = 0;
- map->hdr.entries_pageable = pageable;
rbtree_init(&map->hdr.tree);
rbtree_init(&map->hdr.gap_tree);
@@ -211,8 +209,7 @@ void vm_map_setup(
vm_map_t vm_map_create(
pmap_t pmap,
vm_offset_t min,
- vm_offset_t max,
- boolean_t pageable)
+ vm_offset_t max)
{
vm_map_t result;
@@ -220,11 +217,53 @@ vm_map_t vm_map_create(
if (result == VM_MAP_NULL)
panic("vm_map_create");
- vm_map_setup(result, pmap, min, max, pageable);
+ vm_map_setup(result, pmap, min, max);
return(result);
}
+void vm_map_lock(struct vm_map *map)
+{
+ lock_write(&map->lock);
+
+ /*
+ * XXX Memory allocation may occur while a map is locked,
+ * for example when clipping entries. If the system is running
+ * low on memory, allocating may block until pages are
+ * available. But if a map used by the default pager is
+ * kept locked, a deadlock occurs.
+ *
+ * This workaround temporarily elevates the current thread
+ * VM privileges to avoid that particular deadlock, and does
+ * so regardless of the map for convenience, and because it's
+ * currently impossible to predict which map the default pager
+ * may depend on.
+ *
+ * This workaround isn't reliable, and only makes exhaustion
+ * less likely. In particular pageout may cause lots of data
+ * to be passed between the kernel and the pagers, often
+ * in the form of large copy maps. Making the minimum
+ * number of pages depend on the total number of pages
+ * should make exhaustion even less likely.
+ */
+
+ if (current_thread()) {
+ current_thread()->vm_privilege++;
+ assert(current_thread()->vm_privilege != 0);
+ }
+
+ map->timestamp++;
+}
+
+void vm_map_unlock(struct vm_map *map)
+{
+ if (current_thread()) {
+ current_thread()->vm_privilege--;
+ }
+
+ lock_write_done(&map->lock);
+}
+
/*
* vm_map_entry_create: [ internal use only ]
*
@@ -241,26 +280,8 @@ vm_map_entry_t _vm_map_entry_create(map_header)
const struct vm_map_header *map_header;
{
vm_map_entry_t entry;
- boolean_t vm_privilege;
- /*
- * XXX Map entry creation may occur while a map is locked,
- * for example when clipping entries. If the system is running
- * low on memory, allocating an entry may block until pages
- * are available. But if a map used by the default pager is
- * kept locked, a deadlock occurs.
- *
- * This workaround temporarily elevates the current thread
- * VM privileges to avoid that particular deadlock, and does
- * so regardless of the map for convenience, and because it's
- * currently impossible to predict which map the default pager
- * may depend on.
- */
- vm_privilege = current_thread()->vm_privilege;
- current_thread()->vm_privilege = TRUE;
entry = (vm_map_entry_t) kmem_cache_alloc(&vm_map_entry_cache);
- current_thread()->vm_privilege = vm_privilege;
-
if (entry == VM_MAP_ENTRY_NULL)
panic("vm_map_entry_create");
@@ -1900,7 +1921,7 @@ vm_map_copy_steal_pages(vm_map_copy_t copy)
* Page was not stolen, get a new
* one and do the copy now.
*/
- while ((new_m = vm_page_grab(FALSE)) == VM_PAGE_NULL) {
+ while ((new_m = vm_page_grab()) == VM_PAGE_NULL) {
VM_PAGE_WAIT((void(*)()) 0);
}
@@ -2268,7 +2289,6 @@ start_pass_1:
/*
* XXXO If there are no permanent objects in the destination,
- * XXXO and the source and destination map entry caches match,
* XXXO and the destination map entry is not shared,
* XXXO then the map entries can be deleted and replaced
* XXXO with those from the copy. The following code is the
@@ -2278,8 +2298,7 @@ start_pass_1:
* XXXO to the above pass and make sure that no wiring is involved.
*/
/*
- * if (!contains_permanent_objects &&
- * copy->cpy_hdr.entries_pageable == dst_map->hdr.entries_pageable) {
+ * if (!contains_permanent_objects) {
*
* *
* * Run over copy and adjust entries. Steal code
@@ -2602,48 +2621,6 @@ kern_return_t vm_map_copyout(
}
/*
- * Since we're going to just drop the map
- * entries from the copy into the destination
- * map, they must come from the same pool.
- */
-
- if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
- /*
- * Mismatches occur when dealing with the default
- * pager.
- */
- vm_map_entry_t next, new;
-
- entry = vm_map_copy_first_entry(copy);
-
- /*
- * Reinitialize the copy so that vm_map_copy_entry_link
- * will work.
- */
- copy->cpy_hdr.nentries = 0;
- copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
- rbtree_init(©->cpy_hdr.tree);
- rbtree_init(©->cpy_hdr.gap_tree);
- vm_map_copy_first_entry(copy) =
- vm_map_copy_last_entry(copy) =
- vm_map_copy_to_entry(copy);
-
- /*
- * Copy each entry.
- */
- while (entry != vm_map_copy_to_entry(copy)) {
- new = vm_map_copy_entry_create(copy);
- vm_map_entry_copy_full(new, entry);
- vm_map_copy_entry_link(copy,
- vm_map_copy_last_entry(copy),
- new);
- next = entry->vme_next;
- kmem_cache_free(&vm_map_entry_cache, (vm_offset_t) entry);
- entry = next;
- }
- }
-
- /*
* Adjust the addresses in the copy chain, and
* reset the region attributes.
*/
@@ -3198,7 +3175,6 @@ kern_return_t vm_map_copyin(
vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
copy->type = VM_MAP_COPY_ENTRY_LIST;
copy->cpy_hdr.nentries = 0;
- copy->cpy_hdr.entries_pageable = TRUE;
rbtree_init(©->cpy_hdr.tree);
rbtree_init(©->cpy_hdr.gap_tree);
@@ -3515,8 +3491,7 @@ kern_return_t vm_map_copyin_object(
/*
* We drop the object into a special copy object
* that contains the object directly. These copy objects
- * are distinguished by entries_pageable == FALSE
- * and null links.
+ * are distinguished by links.
*/
copy = (vm_map_copy_t) kmem_cache_alloc(&vm_map_copy_cache);
@@ -4156,8 +4131,7 @@ vm_map_t vm_map_fork(vm_map_t old_map)
new_map = vm_map_create(new_pmap,
old_map->min_offset,
- old_map->max_offset,
- old_map->hdr.entries_pageable);
+ old_map->max_offset);
for (
old_entry = vm_map_first_entry(old_map);
diff --git a/vm/vm_map.h b/vm/vm_map.h
index 9e946c5..537c36e 100644
--- a/vm/vm_map.h
+++ b/vm/vm_map.h
@@ -153,8 +153,6 @@ struct vm_map_header {
struct rbtree gap_tree; /* Sorted tree of gap lists
for allocations */
int nentries; /* Number of entries */
- boolean_t entries_pageable;
- /* are map entries pageable? */
};
/*
@@ -354,13 +352,9 @@ MACRO_BEGIN \
(map)->timestamp = 0; \
MACRO_END
-#define vm_map_lock(map) \
-MACRO_BEGIN \
- lock_write(&(map)->lock); \
- (map)->timestamp++; \
-MACRO_END
+void vm_map_lock(struct vm_map *map);
+void vm_map_unlock(struct vm_map *map);
-#define vm_map_unlock(map) lock_write_done(&(map)->lock)
#define vm_map_lock_read(map) lock_read(&(map)->lock)
#define vm_map_unlock_read(map) lock_read_done(&(map)->lock)
#define vm_map_lock_write_to_read(map) \
@@ -380,11 +374,9 @@ MACRO_END
extern void vm_map_init(void);
/* Initialize an empty map */
-extern void vm_map_setup(vm_map_t, pmap_t, vm_offset_t, vm_offset_t,
- boolean_t);
+extern void vm_map_setup(vm_map_t, pmap_t, vm_offset_t,
vm_offset_t);
/* Create an empty map */
-extern vm_map_t vm_map_create(pmap_t, vm_offset_t, vm_offset_t,
- boolean_t);
+extern vm_map_t vm_map_create(pmap_t, vm_offset_t, vm_offset_t);
/* Create a map in the image of an existing map */
extern vm_map_t vm_map_fork(vm_map_t);
diff --git a/vm/vm_page.c b/vm/vm_page.c
index a868fce..2a9f27b 100644
--- a/vm/vm_page.c
+++ b/vm/vm_page.c
@@ -27,10 +27,13 @@
* multiprocessor systems. When a pool is empty and cannot provide a page,
* it is filled by transferring multiple pages from the backend buddy system.
* The symmetric case is handled likewise.
+ *
+ * TODO Limit number of dirty pages, block allocations above a top limit.
*/
#include <string.h>
#include <kern/assert.h>
+#include <kern/counters.h>
#include <kern/cpu_number.h>
#include <kern/debug.h>
#include <kern/list.h>
@@ -42,6 +45,9 @@
#include <machine/pmap.h>
#include <sys/types.h>
#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+#define DEBUG 0
#define __init
#define __initdata
@@ -98,12 +104,96 @@ struct vm_page_free_list {
};
/*
+ * XXX Because of a potential deadlock involving the default pager (see
+ * vm_map_lock()), it's currently impossible to reliably determine the
+ * minimum number of free pages required for successful pageout. Since
+ * that process is dependent on the amount of physical memory, we scale
+ * the minimum number of free pages from it, in the hope that memory
+ * exhaustion happens as rarely as possible...
+ */
+
+/*
+ * Ratio used to compute the minimum number of pages in a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_MIN_NUM 5
+#define VM_PAGE_SEG_THRESHOLD_MIN_DENOM 100
+
+/*
+ * Number of pages reserved for privileged allocations in a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_MIN 500
+
+/*
+ * Ratio used to compute the threshold below which pageout is started.
+ */
+#define VM_PAGE_SEG_THRESHOLD_LOW_NUM 6
+#define VM_PAGE_SEG_THRESHOLD_LOW_DENOM 100
+
+/*
+ * Minimum value the low threshold can have for a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_LOW 600
+
+#if VM_PAGE_SEG_THRESHOLD_LOW <= VM_PAGE_SEG_THRESHOLD_MIN
+#error VM_PAGE_SEG_THRESHOLD_LOW invalid
+#endif /* VM_PAGE_SEG_THRESHOLD_LOW >= VM_PAGE_SEG_THRESHOLD_MIN */
+
+/*
+ * Ratio used to compute the threshold above which pageout is stopped.
+ */
+#define VM_PAGE_SEG_THRESHOLD_HIGH_NUM 10
+#define VM_PAGE_SEG_THRESHOLD_HIGH_DENOM 100
+
+/*
+ * Minimum value the high threshold can have for a segment.
+ */
+#define VM_PAGE_SEG_THRESHOLD_HIGH 1000
+
+#if VM_PAGE_SEG_THRESHOLD_HIGH <= VM_PAGE_SEG_THRESHOLD_LOW
+#error VM_PAGE_SEG_THRESHOLD_HIGH invalid
+#endif /* VM_PAGE_SEG_THRESHOLD_HIGH <= VM_PAGE_SEG_THRESHOLD_LOW */
+
+/*
+ * Minimum number of pages allowed for a segment.
+ */
+#define VM_PAGE_SEG_MIN_PAGES 2000
+
+#if VM_PAGE_SEG_MIN_PAGES <= VM_PAGE_SEG_THRESHOLD_HIGH
+#error VM_PAGE_SEG_MIN_PAGES invalid
+#endif /* VM_PAGE_SEG_MIN_PAGES <= VM_PAGE_SEG_THRESHOLD_HIGH */
+
+/*
+ * Ratio used to compute the threshold of active pages beyond which
+ * to refill the inactive queue.
+ */
+#define VM_PAGE_HIGH_ACTIVE_PAGE_NUM 1
+#define VM_PAGE_HIGH_ACTIVE_PAGE_DENOM 3
+
+/*
+ * Page cache queue.
+ *
+ * XXX The current implementation hardcodes a preference to evict external
+ * pages first and keep internal ones as much as possible. This is because
+ * the Hurd default pager implementation suffers from bugs that can easily
+ * cause the system to freeze.
+ */
+struct vm_page_queue {
+ struct list internal_pages;
+ struct list external_pages;
+};
+
+/*
* Segment name buffer size.
*/
#define VM_PAGE_NAME_SIZE 16
/*
* Segment of contiguous memory.
+ *
+ * XXX Per-segment locking is probably useless, since one or both of the
+ * page queues lock and the free page queue lock is held on any access.
+ * However it should first be made clear which lock protects access to
+ * which members of a segment.
*/
struct vm_page_seg {
struct vm_page_cpu_pool cpu_pools[NCPUS];
@@ -115,6 +205,19 @@ struct vm_page_seg {
simple_lock_data_t lock;
struct vm_page_free_list free_lists[VM_PAGE_NR_FREE_LISTS];
unsigned long nr_free_pages;
+
+ /* Free memory thresholds */
+ unsigned long min_free_pages; /* Privileged allocations only */
+ unsigned long low_free_pages; /* Pageout daemon starts scanning */
+ unsigned long high_free_pages; /* Pageout daemon stops scanning,
+ unprivileged allocations resume */
+
+ /* Page cache related data */
+ struct vm_page_queue active_pages;
+ unsigned long nr_active_pages;
+ unsigned long high_active_pages;
+ struct vm_page_queue inactive_pages;
+ unsigned long nr_inactive_pages;
};
/*
@@ -123,6 +226,7 @@ struct vm_page_seg {
struct vm_page_boot_seg {
phys_addr_t start;
phys_addr_t end;
+ boolean_t heap_present;
phys_addr_t avail_start;
phys_addr_t avail_end;
};
@@ -157,6 +261,16 @@ static struct vm_page_boot_seg
vm_page_boot_segs[VM_PAGE_MAX_SEGS] __initdata;
*/
static unsigned int vm_page_segs_size __read_mostly;
+/*
+ * If true, unprivileged allocations are blocked, disregarding any other
+ * condition.
+ *
+ * This variable is also used to resume clients once pages are available.
+ *
+ * The free page queue lock must be held when accessing this variable.
+ */
+static boolean_t vm_page_alloc_paused;
+
static void __init
vm_page_init_pa(struct vm_page *page, unsigned short seg_index, phys_addr_t pa)
{
@@ -180,6 +294,40 @@ vm_page_set_type(struct vm_page *page, unsigned int order,
unsigned short type)
page[i].type = type;
}
+static boolean_t
+vm_page_pageable(const struct vm_page *page)
+{
+ return (page->object != NULL)
+ && (page->wire_count == 0)
+ && (page->active || page->inactive);
+}
+
+static boolean_t
+vm_page_can_move(const struct vm_page *page)
+{
+ /*
+ * This function is called on pages pulled from the page queues,
+ * implying they're pageable, which is why the wire count isn't
+ * checked here.
+ */
+
+ return !page->busy
+ && !page->wanted
+ && !page->absent
+ && page->object->alive;
+}
+
+static void
+vm_page_remove_mappings(struct vm_page *page)
+{
+ page->busy = TRUE;
+ pmap_page_protect(page->phys_addr, VM_PROT_NONE);
+
+ if (!page->dirty) {
+ page->dirty = pmap_is_modified(page->phys_addr);
+ }
+}
+
static void __init
vm_page_free_list_init(struct vm_page_free_list *free_list)
{
@@ -216,6 +364,19 @@ vm_page_seg_alloc_from_buddy(struct vm_page_seg *seg,
unsigned int order)
assert(order < VM_PAGE_NR_FREE_LISTS);
+ if (vm_page_alloc_paused && current_thread()
+ && !current_thread()->vm_privilege) {
+ return NULL;
+ } else if (seg->nr_free_pages <= seg->low_free_pages) {
+ vm_pageout_start();
+
+ if ((seg->nr_free_pages <= seg->min_free_pages)
+ && current_thread() && !current_thread()->vm_privilege) {
+ vm_page_alloc_paused = TRUE;
+ return NULL;
+ }
+ }
+
for (i = order; i < VM_PAGE_NR_FREE_LISTS; i++) {
free_list = &seg->free_lists[i];
@@ -238,6 +399,11 @@ vm_page_seg_alloc_from_buddy(struct vm_page_seg *seg,
unsigned int order)
}
seg->nr_free_pages -= (1 << order);
+
+ if (seg->nr_free_pages < seg->min_free_pages) {
+ vm_page_alloc_paused = TRUE;
+ }
+
return page;
}
@@ -361,6 +527,65 @@ vm_page_cpu_pool_drain(struct vm_page_cpu_pool *cpu_pool,
simple_unlock(&seg->lock);
}
+static void
+vm_page_queue_init(struct vm_page_queue *queue)
+{
+ list_init(&queue->internal_pages);
+ list_init(&queue->external_pages);
+}
+
+static void
+vm_page_queue_push(struct vm_page_queue *queue, struct vm_page *page)
+{
+ if (page->external) {
+ list_insert_tail(&queue->external_pages, &page->node);
+ } else {
+ list_insert_tail(&queue->internal_pages, &page->node);
+ }
+}
+
+static void
+vm_page_queue_remove(struct vm_page_queue *queue, struct vm_page *page)
+{
+ (void)queue;
+ list_remove(&page->node);
+}
+
+static struct vm_page *
+vm_page_queue_first(struct vm_page_queue *queue, boolean_t external_only)
+{
+ struct vm_page *page;
+
+ if (!list_empty(&queue->external_pages)) {
+ page = list_first_entry(&queue->external_pages, struct vm_page, node);
+ return page;
+ }
+
+ if (!external_only && !list_empty(&queue->internal_pages)) {
+ page = list_first_entry(&queue->internal_pages, struct vm_page, node);
+ return page;
+ }
+
+ return NULL;
+}
+
+static struct vm_page_seg *
+vm_page_seg_get(unsigned short index)
+{
+ assert(index < vm_page_segs_size);
+ return &vm_page_segs[index];
+}
+
+static unsigned int
+vm_page_seg_index(const struct vm_page_seg *seg)
+{
+ unsigned int index;
+
+ index = seg - vm_page_segs;
+ assert(index < vm_page_segs_size);
+ return index;
+}
+
static phys_addr_t __init
vm_page_seg_size(struct vm_page_seg *seg)
{
@@ -383,6 +608,39 @@ vm_page_seg_compute_pool_size(struct vm_page_seg *seg)
}
static void __init
+vm_page_seg_compute_pageout_thresholds(struct vm_page_seg *seg)
+{
+ unsigned long nr_pages;
+
+ nr_pages = vm_page_atop(vm_page_seg_size(seg));
+
+ if (nr_pages < VM_PAGE_SEG_MIN_PAGES) {
+ panic("vm_page: segment too small");
+ }
+
+ seg->min_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_MIN_NUM
+ / VM_PAGE_SEG_THRESHOLD_MIN_DENOM;
+
+ if (seg->min_free_pages < VM_PAGE_SEG_THRESHOLD_MIN) {
+ seg->min_free_pages = VM_PAGE_SEG_THRESHOLD_MIN;
+ }
+
+ seg->low_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_LOW_NUM
+ / VM_PAGE_SEG_THRESHOLD_LOW_DENOM;
+
+ if (seg->low_free_pages < VM_PAGE_SEG_THRESHOLD_LOW) {
+ seg->low_free_pages = VM_PAGE_SEG_THRESHOLD_LOW;
+ }
+
+ seg->high_free_pages = nr_pages * VM_PAGE_SEG_THRESHOLD_HIGH_NUM
+ / VM_PAGE_SEG_THRESHOLD_HIGH_DENOM;
+
+ if (seg->high_free_pages < VM_PAGE_SEG_THRESHOLD_HIGH) {
+ seg->high_free_pages = VM_PAGE_SEG_THRESHOLD_HIGH;
+ }
+}
+
+static void __init
vm_page_seg_init(struct vm_page_seg *seg, phys_addr_t start, phys_addr_t end,
struct vm_page *pages)
{
@@ -405,7 +663,15 @@ vm_page_seg_init(struct vm_page_seg *seg, phys_addr_t
start, phys_addr_t end,
vm_page_free_list_init(&seg->free_lists[i]);
seg->nr_free_pages = 0;
- i = seg - vm_page_segs;
+
+ vm_page_seg_compute_pageout_thresholds(seg);
+
+ vm_page_queue_init(&seg->active_pages);
+ seg->nr_active_pages = 0;
+ vm_page_queue_init(&seg->inactive_pages);
+ seg->nr_inactive_pages = 0;
+
+ i = vm_page_seg_index(seg);
for (pa = seg->start; pa < seg->end; pa += PAGE_SIZE)
vm_page_init_pa(&pages[vm_page_atop(pa - seg->start)], i, pa);
@@ -482,115 +748,637 @@ vm_page_seg_free(struct vm_page_seg *seg, struct
vm_page *page,
}
}
-void __init
-vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end,
- phys_addr_t avail_start, phys_addr_t avail_end)
+static void
+vm_page_seg_add_active_page(struct vm_page_seg *seg, struct vm_page *page)
{
- struct vm_page_boot_seg *seg;
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && !page->active && !page->inactive);
+ page->active = TRUE;
+ page->reference = TRUE;
+ vm_page_queue_push(&seg->active_pages, page);
+ seg->nr_active_pages++;
+ vm_page_active_count++;
+}
- assert(seg_index < ARRAY_SIZE(vm_page_boot_segs));
- assert(vm_page_aligned(start));
- assert(vm_page_aligned(end));
- assert(vm_page_aligned(avail_start));
- assert(vm_page_aligned(avail_end));
- assert(start < end);
- assert(start <= avail_start);
- assert(avail_end <= end);
- assert(vm_page_segs_size < ARRAY_SIZE(vm_page_boot_segs));
+static void
+vm_page_seg_remove_active_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && page->active && !page->inactive);
+ page->active = FALSE;
+ vm_page_queue_remove(&seg->active_pages, page);
+ seg->nr_active_pages--;
+ vm_page_active_count--;
+}
- seg = &vm_page_boot_segs[seg_index];
- seg->start = start;
- seg->end = end;
- seg->avail_start = avail_start;
- seg->avail_end = avail_end;
- vm_page_segs_size++;
+static void
+vm_page_seg_add_inactive_page(struct vm_page_seg *seg, struct vm_page *page)
+{
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && !page->active && !page->inactive);
+ page->inactive = TRUE;
+ vm_page_queue_push(&seg->inactive_pages, page);
+ seg->nr_inactive_pages++;
+ vm_page_inactive_count++;
}
-int
-vm_page_ready(void)
+static void
+vm_page_seg_remove_inactive_page(struct vm_page_seg *seg, struct vm_page *page)
{
- return vm_page_is_ready;
+ assert(page->object != NULL);
+ assert(page->seg_index == vm_page_seg_index(seg));
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+ assert(!page->free && !page->active && page->inactive);
+ page->inactive = FALSE;
+ vm_page_queue_remove(&seg->inactive_pages, page);
+ seg->nr_inactive_pages--;
+ vm_page_inactive_count--;
}
-static unsigned int
-vm_page_select_alloc_seg(unsigned int selector)
+/*
+ * Attempt to pull an active page.
+ *
+ * If successful, the object containing the page is locked.
+ */
+static struct vm_page *
+vm_page_seg_pull_active_page(struct vm_page_seg *seg, boolean_t external_only)
{
- unsigned int seg_index;
+ struct vm_page *page, *first;
+ boolean_t locked;
- switch (selector) {
- case VM_PAGE_SEL_DMA:
- seg_index = VM_PAGE_SEG_DMA;
- break;
- case VM_PAGE_SEL_DMA32:
- seg_index = VM_PAGE_SEG_DMA32;
- break;
- case VM_PAGE_SEL_DIRECTMAP:
- seg_index = VM_PAGE_SEG_DIRECTMAP;
- break;
- case VM_PAGE_SEL_HIGHMEM:
- seg_index = VM_PAGE_SEG_HIGHMEM;
- break;
- default:
- panic("vm_page: invalid selector");
+ first = NULL;
+
+ for (;;) {
+ page = vm_page_queue_first(&seg->active_pages, external_only);
+
+ if (page == NULL) {
+ break;
+ } else if (first == NULL) {
+ first = page;
+ } else if (first == page) {
+ break;
+ }
+
+ vm_page_seg_remove_active_page(seg, page);
+ locked = vm_object_lock_try(page->object);
+
+ if (!locked) {
+ vm_page_seg_add_active_page(seg, page);
+ continue;
+ }
+
+ if (!vm_page_can_move(page)) {
+ vm_page_seg_add_active_page(seg, page);
+ vm_object_unlock(page->object);
+ continue;
+ }
+
+ return page;
}
- return MIN(vm_page_segs_size - 1, seg_index);
+ return NULL;
}
-static int __init
-vm_page_boot_seg_loaded(const struct vm_page_boot_seg *seg)
+/*
+ * Attempt to pull an inactive page.
+ *
+ * If successful, the object containing the page is locked.
+ *
+ * XXX See vm_page_seg_pull_active_page (duplicated code).
+ */
+static struct vm_page *
+vm_page_seg_pull_inactive_page(struct vm_page_seg *seg, boolean_t
external_only)
{
- return (seg->end != 0);
+ struct vm_page *page, *first;
+ boolean_t locked;
+
+ first = NULL;
+
+ for (;;) {
+ page = vm_page_queue_first(&seg->inactive_pages, external_only);
+
+ if (page == NULL) {
+ break;
+ } else if (first == NULL) {
+ first = page;
+ } else if (first == page) {
+ break;
+ }
+
+ vm_page_seg_remove_inactive_page(seg, page);
+ locked = vm_object_lock_try(page->object);
+
+ if (!locked) {
+ vm_page_seg_add_inactive_page(seg, page);
+ continue;
+ }
+
+ if (!vm_page_can_move(page)) {
+ vm_page_seg_add_inactive_page(seg, page);
+ vm_object_unlock(page->object);
+ continue;
+ }
+
+ return page;
+ }
+
+ return NULL;
}
-static void __init
-vm_page_check_boot_segs(void)
+/*
+ * Attempt to pull a page cache page.
+ *
+ * If successful, the object containing the page is locked.
+ */
+static struct vm_page *
+vm_page_seg_pull_cache_page(struct vm_page_seg *seg,
+ boolean_t external_only,
+ boolean_t *was_active)
{
- unsigned int i;
- int expect_loaded;
+ struct vm_page *page;
- if (vm_page_segs_size == 0)
- panic("vm_page: no physical memory loaded");
+ page = vm_page_seg_pull_inactive_page(seg, external_only);
- for (i = 0; i < ARRAY_SIZE(vm_page_boot_segs); i++) {
- expect_loaded = (i < vm_page_segs_size);
+ if (page != NULL) {
+ *was_active = FALSE;
+ return page;
+ }
- if (vm_page_boot_seg_loaded(&vm_page_boot_segs[i]) == expect_loaded)
- continue;
+ page = vm_page_seg_pull_active_page(seg, external_only);
- panic("vm_page: invalid boot segment table");
+ if (page != NULL) {
+ *was_active = TRUE;
+ return page;
}
+
+ return NULL;
}
-static phys_addr_t __init
-vm_page_boot_seg_size(struct vm_page_boot_seg *seg)
+static boolean_t
+vm_page_seg_min_page_available(const struct vm_page_seg *seg)
{
- return seg->end - seg->start;
+ return (seg->nr_free_pages > seg->min_free_pages);
}
-static phys_addr_t __init
-vm_page_boot_seg_avail_size(struct vm_page_boot_seg *seg)
+static boolean_t
+vm_page_seg_page_available(const struct vm_page_seg *seg)
{
- return seg->avail_end - seg->avail_start;
+ return (seg->nr_free_pages > seg->high_free_pages);
}
-unsigned long __init
-vm_page_bootalloc(size_t size)
+static boolean_t
+vm_page_seg_usable(const struct vm_page_seg *seg)
{
- struct vm_page_boot_seg *seg;
- phys_addr_t pa;
- unsigned int i;
+ return (seg->nr_free_pages >= seg->high_free_pages);
+}
- for (i = vm_page_select_alloc_seg(VM_PAGE_SEL_DIRECTMAP);
- i < vm_page_segs_size;
- i--) {
- seg = &vm_page_boot_segs[i];
+static void
+vm_page_seg_double_lock(struct vm_page_seg *seg1, struct vm_page_seg *seg2)
+{
+ assert(seg1 != seg2);
- if (size <= vm_page_boot_seg_avail_size(seg)) {
- pa = seg->avail_start;
- seg->avail_start += vm_page_round(size);
- return pa;
- }
+ if (seg1 < seg2) {
+ simple_lock(&seg1->lock);
+ simple_lock(&seg2->lock);
+ } else {
+ simple_lock(&seg2->lock);
+ simple_lock(&seg1->lock);
+ }
+}
+
+static void
+vm_page_seg_double_unlock(struct vm_page_seg *seg1, struct vm_page_seg *seg2)
+{
+ simple_unlock(&seg1->lock);
+ simple_unlock(&seg2->lock);
+}
+
+/*
+ * Attempt to balance a segment by moving one page to another segment.
+ *
+ * Return TRUE if a page was actually moved.
+ */
+static boolean_t
+vm_page_seg_balance_page(struct vm_page_seg *seg,
+ struct vm_page_seg *remote_seg)
+{
+ struct vm_page *src, *dest;
+ vm_object_t object;
+ vm_offset_t offset;
+ boolean_t was_active;
+
+ vm_page_lock_queues();
+ simple_lock(&vm_page_queue_free_lock);
+ vm_page_seg_double_lock(seg, remote_seg);
+
+ if (vm_page_seg_usable(seg)
+ || !vm_page_seg_page_available(remote_seg)) {
+ goto error;
+ }
+
+ src = vm_page_seg_pull_cache_page(seg, FALSE, &was_active);
+
+ if (src == NULL) {
+ goto error;
+ }
+
+ assert(src->object != NULL);
+ assert(!src->fictitious && !src->private);
+ assert(src->wire_count == 0);
+ assert(src->type != VM_PT_FREE);
+ assert(src->order == VM_PAGE_ORDER_UNLISTED);
+
+ dest = vm_page_seg_alloc_from_buddy(remote_seg, 0);
+ assert(dest != NULL);
+
+ vm_page_seg_double_unlock(seg, remote_seg);
+ simple_unlock(&vm_page_queue_free_lock);
+
+ if (!was_active && !src->reference && pmap_is_referenced(src->phys_addr)) {
+ src->reference = TRUE;
+ }
+
+ object = src->object;
+ offset = src->offset;
+ vm_page_remove(src);
+
+ vm_page_remove_mappings(src);
+
+ vm_page_set_type(dest, 0, src->type);
+ memcpy(&dest->vm_page_header, &src->vm_page_header,
+ sizeof(*dest) - VM_PAGE_HEADER_SIZE);
+ vm_page_copy(src, dest);
+
+ if (!src->dirty) {
+ pmap_clear_modify(dest->phys_addr);
+ }
+
+ dest->busy = FALSE;
+
+ simple_lock(&vm_page_queue_free_lock);
+ vm_page_init(src);
+ src->free = TRUE;
+ simple_lock(&seg->lock);
+ vm_page_set_type(src, 0, VM_PT_FREE);
+ vm_page_seg_free_to_buddy(seg, src, 0);
+ simple_unlock(&seg->lock);
+ simple_unlock(&vm_page_queue_free_lock);
+
+ vm_page_insert(dest, object, offset);
+ vm_object_unlock(object);
+
+ if (was_active) {
+ vm_page_activate(dest);
+ } else {
+ vm_page_deactivate(dest);
+ }
+
+ vm_page_unlock_queues();
+
+ return TRUE;
+
+error:
+ vm_page_seg_double_unlock(seg, remote_seg);
+ simple_unlock(&vm_page_queue_free_lock);
+ vm_page_unlock_queues();
+ return FALSE;
+}
+
+static boolean_t
+vm_page_seg_balance(struct vm_page_seg *seg)
+{
+ struct vm_page_seg *remote_seg;
+ unsigned int i;
+ boolean_t balanced;
+
+ /*
+ * It's important here that pages are moved to lower priority
+ * segments first.
+ */
+
+ for (i = vm_page_segs_size - 1; i < vm_page_segs_size; i--) {
+ remote_seg = vm_page_seg_get(i);
+
+ if (remote_seg == seg) {
+ continue;
+ }
+
+ balanced = vm_page_seg_balance_page(seg, remote_seg);
+
+ if (balanced) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static boolean_t
+vm_page_seg_evict(struct vm_page_seg *seg,
+ boolean_t external_only, boolean_t low_memory)
+{
+ struct vm_page *page;
+ boolean_t reclaim, laundry;
+ vm_object_t object;
+ boolean_t was_active;
+
+ page = NULL;
+ object = NULL;
+
+restart:
+ vm_page_lock_queues();
+ simple_lock(&seg->lock);
+
+ if (page != NULL) {
+ vm_object_lock(page->object);
+ } else {
+ page = vm_page_seg_pull_cache_page(seg, external_only, &was_active);
+
+ if (page == NULL) {
+ goto out;
+ }
+ }
+
+ assert(page->object != NULL);
+ assert(!page->fictitious && !page->private);
+ assert(page->wire_count == 0);
+ assert(page->type != VM_PT_FREE);
+ assert(page->order == VM_PAGE_ORDER_UNLISTED);
+
+ object = page->object;
+
+ if (!was_active
+ && (page->reference || pmap_is_referenced(page->phys_addr))) {
+ vm_page_seg_add_active_page(seg, page);
+ simple_unlock(&seg->lock);
+ vm_object_unlock(object);
+ vm_stat.reactivations++;
+ current_task()->reactivations++;
+ vm_page_unlock_queues();
+ page = NULL;
+ goto restart;
+ }
+
+ vm_page_remove_mappings(page);
+
+ if (!page->dirty && !page->precious) {
+ reclaim = TRUE;
+ goto out;
+ }
+
+ reclaim = FALSE;
+
+ /*
+ * If we are very low on memory, then we can't rely on an external
+ * pager to clean a dirty page, because external pagers are not
+ * vm-privileged.
+ *
+ * The laundry bit tells vm_pageout_setup not to do any special
+ * processing of this page since it's immediately going to be
+ * double paged out to the default pager. The laundry bit is
+ * reset and the page is inserted into an internal object by
+ * vm_pageout_setup before the double paging pass.
+ */
+
+ assert(!page->laundry);
+
+ if (object->internal || !low_memory) {
+ laundry = FALSE;
+ } else {
+ laundry = page->laundry = TRUE;
+ }
+
+out:
+ simple_unlock(&seg->lock);
+
+ if (object == NULL) {
+ vm_page_unlock_queues();
+ return FALSE;
+ }
+
+ if (reclaim) {
+ vm_page_free(page);
+ vm_page_unlock_queues();
+
+ if (vm_object_collectable(object)) {
+ vm_object_collect(object);
+ } else {
+ vm_object_unlock(object);
+ }
+
+ return TRUE;
+ }
+
+ vm_page_unlock_queues();
+
+ /*
+ * If there is no memory object for the page, create one and hand it
+ * to the default pager. First try to collapse, so we don't create
+ * one unnecessarily.
+ */
+
+ if (!object->pager_initialized) {
+ vm_object_collapse(object);
+ }
+
+ if (!object->pager_initialized) {
+ vm_object_pager_create(object);
+ }
+
+ if (!object->pager_initialized) {
+ panic("vm_page_seg_evict");
+ }
+
+ vm_pageout_page(page, FALSE, TRUE); /* flush it */
+ vm_object_unlock(object);
+
+ if (laundry) {
+ goto restart;
+ }
+
+ return TRUE;
+}
+
+static void
+vm_page_seg_compute_high_active_page(struct vm_page_seg *seg)
+{
+ unsigned long nr_pages;
+
+ nr_pages = seg->nr_active_pages + seg->nr_inactive_pages;
+ seg->high_active_pages = nr_pages * VM_PAGE_HIGH_ACTIVE_PAGE_NUM
+ / VM_PAGE_HIGH_ACTIVE_PAGE_DENOM;
+}
+
+static void
+vm_page_seg_refill_inactive(struct vm_page_seg *seg)
+{
+ struct vm_page *page;
+
+ simple_lock(&seg->lock);
+
+ vm_page_seg_compute_high_active_page(seg);
+
+ while (seg->nr_active_pages > seg->high_active_pages) {
+ page = vm_page_seg_pull_active_page(seg, FALSE);
+
+ if (page == NULL) {
+ break;
+ }
+
+ page->reference = FALSE;
+ pmap_clear_reference(page->phys_addr);
+ vm_page_seg_add_inactive_page(seg, page);
+ vm_object_unlock(page->object);
+ }
+
+ simple_unlock(&seg->lock);
+}
+
+void __init
+vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end)
+{
+ struct vm_page_boot_seg *seg;
+
+ assert(seg_index < ARRAY_SIZE(vm_page_boot_segs));
+ assert(vm_page_aligned(start));
+ assert(vm_page_aligned(end));
+ assert(start < end);
+ assert(vm_page_segs_size < ARRAY_SIZE(vm_page_boot_segs));
+
+ seg = &vm_page_boot_segs[seg_index];
+ seg->start = start;
+ seg->end = end;
+ seg->heap_present = FALSE;
+
+#if DEBUG
+ printf("vm_page: load: %s: %llx:%llx\n",
+ vm_page_seg_name(seg_index),
+ (unsigned long long)start, (unsigned long long)end);
+#endif
+
+ vm_page_segs_size++;
+}
+
+void
+vm_page_load_heap(unsigned int seg_index, phys_addr_t start, phys_addr_t end)
+{
+ struct vm_page_boot_seg *seg;
+
+ assert(seg_index < ARRAY_SIZE(vm_page_boot_segs));
+ assert(vm_page_aligned(start));
+ assert(vm_page_aligned(end));
+
+ seg = &vm_page_boot_segs[seg_index];
+
+ assert(seg->start <= start);
+ assert(end <= seg-> end);
+
+ seg->avail_start = start;
+ seg->avail_end = end;
+ seg->heap_present = TRUE;
+
+#if DEBUG
+ printf("vm_page: heap: %s: %llx:%llx\n",
+ vm_page_seg_name(seg_index),
+ (unsigned long long)start, (unsigned long long)end);
+#endif
+}
+
+int
+vm_page_ready(void)
+{
+ return vm_page_is_ready;
+}
+
+static unsigned int
+vm_page_select_alloc_seg(unsigned int selector)
+{
+ unsigned int seg_index;
+
+ switch (selector) {
+ case VM_PAGE_SEL_DMA:
+ seg_index = VM_PAGE_SEG_DMA;
+ break;
+ case VM_PAGE_SEL_DMA32:
+ seg_index = VM_PAGE_SEG_DMA32;
+ break;
+ case VM_PAGE_SEL_DIRECTMAP:
+ seg_index = VM_PAGE_SEG_DIRECTMAP;
+ break;
+ case VM_PAGE_SEL_HIGHMEM:
+ seg_index = VM_PAGE_SEG_HIGHMEM;
+ break;
+ default:
+ panic("vm_page: invalid selector");
+ }
+
+ return MIN(vm_page_segs_size - 1, seg_index);
+}
+
+static int __init
+vm_page_boot_seg_loaded(const struct vm_page_boot_seg *seg)
+{
+ return (seg->end != 0);
+}
+
+static void __init
+vm_page_check_boot_segs(void)
+{
+ unsigned int i;
+ int expect_loaded;
+
+ if (vm_page_segs_size == 0)
+ panic("vm_page: no physical memory loaded");
+
+ for (i = 0; i < ARRAY_SIZE(vm_page_boot_segs); i++) {
+ expect_loaded = (i < vm_page_segs_size);
+
+ if (vm_page_boot_seg_loaded(&vm_page_boot_segs[i]) == expect_loaded)
+ continue;
+
+ panic("vm_page: invalid boot segment table");
+ }
+}
+
+static phys_addr_t __init
+vm_page_boot_seg_size(struct vm_page_boot_seg *seg)
+{
+ return seg->end - seg->start;
+}
+
+static phys_addr_t __init
+vm_page_boot_seg_avail_size(struct vm_page_boot_seg *seg)
+{
+ return seg->avail_end - seg->avail_start;
+}
+
+unsigned long __init
+vm_page_bootalloc(size_t size)
+{
+ struct vm_page_boot_seg *seg;
+ phys_addr_t pa;
+ unsigned int i;
+
+ for (i = vm_page_select_alloc_seg(VM_PAGE_SEL_DIRECTMAP);
+ i < vm_page_segs_size;
+ i--) {
+ seg = &vm_page_boot_segs[i];
+
+ if (size <= vm_page_boot_seg_avail_size(seg)) {
+ pa = seg->avail_start;
+ seg->avail_start += vm_page_round(size);
+ return pa;
+ }
}
panic("vm_page: no physical memory available");
@@ -683,21 +1471,92 @@ vm_page_lookup_pa(phys_addr_t pa)
return NULL;
}
-struct vm_page *
-vm_page_alloc_pa(unsigned int order, unsigned int selector, unsigned short
type)
+static struct vm_page_seg *
+vm_page_lookup_seg(const struct vm_page *page)
{
- struct vm_page *page;
+ struct vm_page_seg *seg;
unsigned int i;
- for (i = vm_page_select_alloc_seg(selector); i < vm_page_segs_size; i--) {
- page = vm_page_seg_alloc(&vm_page_segs[i], order, type);
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
- if (page != NULL)
- return page;
+ if ((page->phys_addr >= seg->start) && (page->phys_addr < seg->end)) {
+ return seg;
+ }
}
- if (type == VM_PT_PMAP)
- panic("vm_page: unable to allocate pmap page");
+ return NULL;
+}
+
+void vm_page_check(const struct vm_page *page)
+{
+ if (page->fictitious) {
+ if (page->private) {
+ panic("vm_page: page both fictitious and private");
+ }
+
+ if (page->phys_addr != vm_page_fictitious_addr) {
+ panic("vm_page: invalid fictitious page");
+ }
+ } else {
+ struct vm_page_seg *seg;
+
+ if (page->phys_addr == vm_page_fictitious_addr) {
+ panic("vm_page: real page has fictitious address");
+ }
+
+ seg = vm_page_lookup_seg(page);
+
+ if (seg == NULL) {
+ if (!page->private) {
+ panic("vm_page: page claims it's managed but not in any
segment");
+ }
+ } else {
+ if (page->private) {
+ struct vm_page *real_page;
+
+ if (vm_page_pageable(page)) {
+ panic("vm_page: private page is pageable");
+ }
+
+ real_page = vm_page_lookup_pa(page->phys_addr);
+
+ if (vm_page_pageable(real_page)) {
+ panic("vm_page: page underlying private page is pageable");
+ }
+
+ if ((real_page->type == VM_PT_FREE)
+ || (real_page->order != VM_PAGE_ORDER_UNLISTED)) {
+ panic("vm_page: page underlying private pagei is free");
+ }
+ } else {
+ unsigned int index;
+
+ index = vm_page_seg_index(seg);
+
+ if (index != page->seg_index) {
+ panic("vm_page: page segment mismatch");
+ }
+ }
+ }
+ }
+}
+
+struct vm_page *
+vm_page_alloc_pa(unsigned int order, unsigned int selector, unsigned short
type)
+{
+ struct vm_page *page;
+ unsigned int i;
+
+ for (i = vm_page_select_alloc_seg(selector); i < vm_page_segs_size; i--) {
+ page = vm_page_seg_alloc(&vm_page_segs[i], order, type);
+
+ if (page != NULL)
+ return page;
+ }
+
+ if (!current_thread() || current_thread()->vm_privilege)
+ panic("vm_page: privileged thread unable to allocate page");
return NULL;
}
@@ -740,10 +1599,75 @@ vm_page_info_all(void)
printf("vm_page: %s: pages: %lu (%luM), free: %lu (%luM)\n",
vm_page_seg_name(i), pages, pages >> (20 - PAGE_SHIFT),
seg->nr_free_pages, seg->nr_free_pages >> (20 - PAGE_SHIFT));
+ printf("vm_page: %s: min:%lu low:%lu high:%lu\n",
+ vm_page_seg_name(vm_page_seg_index(seg)),
+ seg->min_free_pages, seg->low_free_pages, seg->high_free_pages);
}
}
phys_addr_t
+vm_page_seg_end(unsigned int selector)
+{
+ return vm_page_segs[vm_page_select_alloc_seg(selector)].end;
+}
+
+static unsigned long
+vm_page_boot_table_size(void)
+{
+ unsigned long nr_pages;
+ unsigned int i;
+
+ nr_pages = 0;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ nr_pages += vm_page_atop(vm_page_boot_seg_size(&vm_page_boot_segs[i]));
+ }
+
+ return nr_pages;
+}
+
+unsigned long
+vm_page_table_size(void)
+{
+ unsigned long nr_pages;
+ unsigned int i;
+
+ if (!vm_page_is_ready) {
+ return vm_page_boot_table_size();
+ }
+
+ nr_pages = 0;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ nr_pages += vm_page_atop(vm_page_seg_size(&vm_page_segs[i]));
+ }
+
+ return nr_pages;
+}
+
+unsigned long
+vm_page_table_index(phys_addr_t pa)
+{
+ struct vm_page_seg *seg;
+ unsigned long index;
+ unsigned int i;
+
+ index = 0;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = &vm_page_segs[i];
+
+ if ((pa >= seg->start) && (pa < seg->end)) {
+ return index + vm_page_atop(pa - seg->start);
+ }
+
+ index += vm_page_atop(vm_page_seg_size(seg));
+ }
+
+ panic("vm_page: invalid physical address");
+}
+
+phys_addr_t
vm_page_mem_size(void)
{
phys_addr_t total;
@@ -752,10 +1676,6 @@ vm_page_mem_size(void)
total = 0;
for (i = 0; i < vm_page_segs_size; i++) {
- /* XXX */
- if (i > VM_PAGE_SEG_DIRECTMAP)
- continue;
-
total += vm_page_seg_size(&vm_page_segs[i]);
}
@@ -771,12 +1691,413 @@ vm_page_mem_free(void)
total = 0;
for (i = 0; i < vm_page_segs_size; i++) {
- /* XXX */
- if (i > VM_PAGE_SEG_DIRECTMAP)
- continue;
-
total += vm_page_segs[i].nr_free_pages;
}
return total;
}
+
+/*
+ * Mark this page as wired down by yet another map, removing it
+ * from paging queues as necessary.
+ *
+ * The page's object and the page queues must be locked.
+ */
+void
+vm_page_wire(struct vm_page *page)
+{
+ VM_PAGE_CHECK(page);
+
+ if (page->wire_count == 0) {
+ vm_page_queues_remove(page);
+
+ if (!page->private && !page->fictitious) {
+ vm_page_wire_count++;
+ }
+ }
+
+ page->wire_count++;
+}
+
+/*
+ * Release one wiring of this page, potentially enabling it to be paged again.
+ *
+ * The page's object and the page queues must be locked.
+ */
+void
+vm_page_unwire(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ VM_PAGE_CHECK(page);
+
+ assert(page->wire_count != 0);
+ page->wire_count--;
+
+ if ((page->wire_count != 0)
+ || page->fictitious
+ || page->private) {
+ return;
+ }
+
+ seg = vm_page_seg_get(page->seg_index);
+
+ simple_lock(&seg->lock);
+ vm_page_seg_add_active_page(seg, page);
+ simple_unlock(&seg->lock);
+
+ vm_page_wire_count--;
+}
+
+/*
+ * Returns the given page to the inactive list, indicating that
+ * no physical maps have access to this page.
+ * [Used by the physical mapping system.]
+ *
+ * The page queues must be locked.
+ */
+void
+vm_page_deactivate(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ VM_PAGE_CHECK(page);
+
+ /*
+ * This page is no longer very interesting. If it was
+ * interesting (active or inactive/referenced), then we
+ * clear the reference bit and (re)enter it in the
+ * inactive queue. Note wired pages should not have
+ * their reference bit cleared.
+ */
+
+ if (page->active || (page->inactive && page->reference)) {
+ if (!page->fictitious && !page->private && !page->absent) {
+ pmap_clear_reference(page->phys_addr);
+ }
+
+ page->reference = FALSE;
+ vm_page_queues_remove(page);
+ }
+
+ if ((page->wire_count == 0) && !page->fictitious
+ && !page->private && !page->inactive) {
+ seg = vm_page_seg_get(page->seg_index);
+
+ simple_lock(&seg->lock);
+ vm_page_seg_add_inactive_page(seg, page);
+ simple_unlock(&seg->lock);
+ }
+}
+
+/*
+ * Put the specified page on the active list (if appropriate).
+ *
+ * The page queues must be locked.
+ */
+void
+vm_page_activate(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ VM_PAGE_CHECK(page);
+
+ /*
+ * Unconditionally remove so that, even if the page was already
+ * active, it gets back to the end of the active queue.
+ */
+ vm_page_queues_remove(page);
+
+ if ((page->wire_count == 0) && !page->fictitious && !page->private) {
+ seg = vm_page_seg_get(page->seg_index);
+
+ if (page->active)
+ panic("vm_page_activate: already active");
+
+ simple_lock(&seg->lock);
+ vm_page_seg_add_active_page(seg, page);
+ simple_unlock(&seg->lock);
+ }
+}
+
+void
+vm_page_queues_remove(struct vm_page *page)
+{
+ struct vm_page_seg *seg;
+
+ assert(!page->active || !page->inactive);
+
+ if (!page->active && !page->inactive) {
+ return;
+ }
+
+ seg = vm_page_seg_get(page->seg_index);
+
+ simple_lock(&seg->lock);
+
+ if (page->active) {
+ vm_page_seg_remove_active_page(seg, page);
+ } else {
+ vm_page_seg_remove_inactive_page(seg, page);
+ }
+
+ simple_unlock(&seg->lock);
+}
+
+/*
+ * Check whether segments are all usable for unprivileged allocations.
+ *
+ * If all segments are usable, resume pending unprivileged allocations
+ * and return TRUE.
+ *
+ * This function acquires vm_page_queue_free_lock, which is held on return.
+ */
+static boolean_t
+vm_page_check_usable(void)
+{
+ struct vm_page_seg *seg;
+ boolean_t usable;
+ unsigned int i;
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = vm_page_seg_get(i);
+
+ simple_lock(&seg->lock);
+ usable = vm_page_seg_usable(seg);
+ simple_unlock(&seg->lock);
+
+ if (!usable) {
+ return FALSE;
+ }
+ }
+
+ vm_page_external_pagedout = -1;
+ vm_page_alloc_paused = FALSE;
+ thread_wakeup(&vm_page_alloc_paused);
+ return TRUE;
+}
+
+static boolean_t
+vm_page_may_balance(void)
+{
+ struct vm_page_seg *seg;
+ boolean_t page_available;
+ unsigned int i;
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ seg = vm_page_seg_get(i);
+
+ simple_lock(&seg->lock);
+ page_available = vm_page_seg_page_available(seg);
+ simple_unlock(&seg->lock);
+
+ if (page_available) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static boolean_t
+vm_page_balance_once(void)
+{
+ boolean_t balanced;
+ unsigned int i;
+
+ /*
+ * It's important here that pages are moved from higher priority
+ * segments first.
+ */
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ balanced = vm_page_seg_balance(vm_page_seg_get(i));
+
+ if (balanced) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+boolean_t
+vm_page_balance(void)
+{
+ boolean_t balanced;
+
+ while (vm_page_may_balance()) {
+ balanced = vm_page_balance_once();
+
+ if (!balanced) {
+ break;
+ }
+ }
+
+ return vm_page_check_usable();
+}
+
+static boolean_t
+vm_page_evict_once(boolean_t external_only)
+{
+ struct vm_page_seg *seg;
+ boolean_t low_memory, min_page_available, evicted;
+ unsigned int i;
+
+ /*
+ * XXX Page allocation currently only uses the DIRECTMAP selector,
+ * allowing us to know which segments to look at when determining
+ * whether we're very low on memory.
+ */
+ low_memory = TRUE;
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ if (i > VM_PAGE_SEG_DIRECTMAP) {
+ break;
+ }
+
+ seg = vm_page_seg_get(i);
+
+ simple_lock(&seg->lock);
+ min_page_available = vm_page_seg_min_page_available(seg);
+ simple_unlock(&seg->lock);
+
+ if (min_page_available) {
+ low_memory = FALSE;
+ break;
+ }
+ }
+
+ simple_unlock(&vm_page_queue_free_lock);
+
+ /*
+ * It's important here that pages are evicted from lower priority
+ * segments first.
+ */
+
+ for (i = vm_page_segs_size - 1; i < vm_page_segs_size; i--) {
+ evicted = vm_page_seg_evict(vm_page_seg_get(i),
+ external_only, low_memory);
+
+ if (evicted) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+#define VM_PAGE_MAX_LAUNDRY 5
+#define VM_PAGE_MAX_EVICTIONS 5
+
+boolean_t
+vm_page_evict(boolean_t *should_wait)
+{
+ boolean_t pause, evicted, external_only;
+ unsigned int i;
+
+ *should_wait = TRUE;
+ external_only = TRUE;
+
+ simple_lock(&vm_page_queue_free_lock);
+ vm_page_external_pagedout = 0;
+ simple_unlock(&vm_page_queue_free_lock);
+
+again:
+ vm_page_lock_queues();
+ pause = (vm_page_laundry_count >= VM_PAGE_MAX_LAUNDRY);
+ vm_page_unlock_queues();
+
+ if (pause) {
+ simple_lock(&vm_page_queue_free_lock);
+ return FALSE;
+ }
+
+ for (i = 0; i < VM_PAGE_MAX_EVICTIONS; i++) {
+ evicted = vm_page_evict_once(external_only);
+
+ if (!evicted) {
+ break;
+ }
+ }
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ /*
+ * Keep in mind eviction may not cause pageouts, since non-precious
+ * clean pages are simply released.
+ */
+ if ((vm_page_external_pagedout == 0) || (vm_page_laundry_count == 0)) {
+ /*
+ * No pageout, but some clean pages were freed. Start a complete
+ * scan again without waiting.
+ */
+ if (evicted) {
+ *should_wait = FALSE;
+ return FALSE;
+ }
+
+ /*
+ * Eviction failed, consider pages from internal objects on the
+ * next attempt.
+ */
+ if (external_only) {
+ simple_unlock(&vm_page_queue_free_lock);
+ external_only = FALSE;
+ goto again;
+ }
+
+ /*
+ * TODO Find out what could cause this and how to deal with it.
+ * This will likely require an out-of-memory killer.
+ */
+ panic("vm_page: unable to recycle any page");
+ }
+
+ simple_unlock(&vm_page_queue_free_lock);
+
+ return vm_page_check_usable();
+}
+
+void
+vm_page_refill_inactive(void)
+{
+ unsigned int i;
+
+ vm_page_lock_queues();
+
+ for (i = 0; i < vm_page_segs_size; i++) {
+ vm_page_seg_refill_inactive(vm_page_seg_get(i));
+ }
+
+ vm_page_unlock_queues();
+}
+
+void
+vm_page_wait(void (*continuation)(void))
+{
+ assert(!current_thread()->vm_privilege);
+
+ simple_lock(&vm_page_queue_free_lock);
+
+ if (!vm_page_alloc_paused) {
+ simple_unlock(&vm_page_queue_free_lock);
+ return;
+ }
+
+ assert_wait(&vm_page_alloc_paused, FALSE);
+
+ simple_unlock(&vm_page_queue_free_lock);
+
+ if (continuation != 0) {
+ counter(c_vm_page_wait_block_user++);
+ thread_block(continuation);
+ } else {
+ counter(c_vm_page_wait_block_kernel++);
+ thread_block((void (*)(void)) 0);
+ }
+}
diff --git a/vm/vm_page.h b/vm/vm_page.h
index f2e20a7..eb684c1 100644
--- a/vm/vm_page.h
+++ b/vm/vm_page.h
@@ -40,6 +40,7 @@
#include <vm/vm_object.h>
#include <vm/vm_types.h>
#include <kern/queue.h>
+#include <kern/list.h>
#include <kern/lock.h>
#include <kern/log2.h>
@@ -77,8 +78,7 @@
*/
struct vm_page {
- /* Members used in the vm_page module only */
- struct list node;
+ struct list node; /* page queues or free list (P) */
unsigned short type;
unsigned short seg_index;
unsigned short order;
@@ -90,15 +90,13 @@ struct vm_page {
*/
phys_addr_t phys_addr;
+ queue_chain_t listq; /* all pages in same object (O) */
+ struct vm_page *next; /* VP bucket link (O) */
+
/* We use an empty struct as the delimiter. */
struct {} vm_page_header;
#define VM_PAGE_HEADER_SIZE offsetof(struct vm_page, vm_page_header)
- queue_chain_t pageq; /* queue info for FIFO
- * queue or free list (P) */
- queue_chain_t listq; /* all pages in same object (O) */
- struct vm_page *next; /* VP bucket link (O) */
-
vm_object_t object; /* which object am I in (O,P) */
vm_offset_t offset; /* offset into that object (O,P) */
@@ -109,8 +107,7 @@ struct vm_page {
laundry:1, /* page is being cleaned now (P)*/
free:1, /* page is on free list (P) */
reference:1, /* page has been used (P) */
- external:1, /* page considered external (P) */
- extcounted:1, /* page counted in ext counts (P) */
+ external:1, /* page in external object (P) */
busy:1, /* page is in transit (O) */
wanted:1, /* someone is waiting for page (O) */
tabled:1, /* page is in VP table (O) */
@@ -137,7 +134,9 @@ struct vm_page {
* some useful check on a page structure.
*/
-#define VM_PAGE_CHECK(mem)
+#define VM_PAGE_CHECK(mem) vm_page_check(mem)
+
+void vm_page_check(const struct vm_page *page);
/*
* Each pageable resident page falls into one of three lists:
@@ -156,13 +155,6 @@ struct vm_page {
*/
extern
-vm_page_t vm_page_queue_fictitious; /* fictitious free queue */
-extern
-queue_head_t vm_page_queue_active; /* active memory queue */
-extern
-queue_head_t vm_page_queue_inactive; /* inactive memory queue */
-
-extern
int vm_page_fictitious_count;/* How many fictitious pages are free? */
extern
int vm_page_active_count; /* How many pages are active? */
@@ -171,36 +163,16 @@ int vm_page_inactive_count; /* How many pages are
inactive? */
extern
int vm_page_wire_count; /* How many pages are wired? */
extern
-int vm_page_free_target; /* How many do we want free? */
-extern
-int vm_page_free_min; /* When to wakeup pageout */
-extern
-int vm_page_inactive_target;/* How many do we want inactive? */
-extern
-int vm_page_free_reserved; /* How many pages reserved to do pageout */
-extern
int vm_page_laundry_count; /* How many pages being laundered? */
extern
-int vm_page_external_limit; /* Max number of pages for external objects */
-
-/* Only objects marked with the extcounted bit are included in this total.
- Pages which we scan for possible pageout, but which are not actually
- dirty, don't get considered against the external page limits any more
- in this way. */
-extern
-int vm_page_external_count; /* How many pages for external objects? */
-
-
+int vm_page_external_pagedout; /* How many external pages being paged
out? */
decl_simple_lock_data(extern,vm_page_queue_lock)/* lock on active and inactive
page queues */
decl_simple_lock_data(extern,vm_page_queue_free_lock)
/* lock on free page queue */
-extern unsigned int vm_page_free_wanted;
- /* how many threads are waiting for memory */
-
-extern vm_offset_t vm_page_fictitious_addr;
+extern phys_addr_t vm_page_fictitious_addr;
/* (fake) phys_addr of fictitious pages */
extern void vm_page_bootstrap(
@@ -212,9 +184,11 @@ extern vm_page_t vm_page_lookup(
vm_object_t object,
vm_offset_t offset);
extern vm_page_t vm_page_grab_fictitious(void);
-extern boolean_t vm_page_convert(vm_page_t *, boolean_t);
+extern boolean_t vm_page_convert(vm_page_t *);
extern void vm_page_more_fictitious(void);
-extern vm_page_t vm_page_grab(boolean_t);
+extern vm_page_t vm_page_grab(void);
+extern void vm_page_release(vm_page_t, boolean_t, boolean_t);
+extern phys_addr_t vm_page_grab_phys_addr(void);
extern vm_page_t vm_page_grab_contig(vm_size_t, unsigned int);
extern void vm_page_free_contig(vm_page_t, vm_size_t);
extern void vm_page_wait(void (*)(void));
@@ -303,22 +277,7 @@ extern unsigned int vm_page_info(
#define vm_page_lock_queues() simple_lock(&vm_page_queue_lock)
#define vm_page_unlock_queues() simple_unlock(&vm_page_queue_lock)
-#define VM_PAGE_QUEUES_REMOVE(mem) \
- MACRO_BEGIN \
- if (mem->active) { \
- queue_remove(&vm_page_queue_active, \
- mem, vm_page_t, pageq); \
- mem->active = FALSE; \
- vm_page_active_count--; \
- } \
- \
- if (mem->inactive) { \
- queue_remove(&vm_page_queue_inactive, \
- mem, vm_page_t, pageq); \
- mem->inactive = FALSE; \
- vm_page_inactive_count--; \
- } \
- MACRO_END
+#define VM_PAGE_QUEUES_REMOVE(mem) vm_page_queues_remove(mem)
/*
* Copyright (c) 2010-2014 Richard Braun.
@@ -367,18 +326,11 @@ extern unsigned int vm_page_info(
/*
* Page usage types.
- *
- * Failing to allocate pmap pages will cause a kernel panic.
- * TODO Obviously, this needs to be addressed, e.g. with a reserved pool of
- * pages.
*/
#define VM_PT_FREE 0 /* Page unused */
#define VM_PT_RESERVED 1 /* Page reserved at boot time */
#define VM_PT_TABLE 2 /* Page is part of the page table */
-#define VM_PT_PMAP 3 /* Page stores pmap-specific data */
-#define VM_PT_KMEM 4 /* Page is part of a kmem slab */
-#define VM_PT_STACK 5 /* Type for generic kernel allocations */
-#define VM_PT_KERNEL 6 /* Type for generic kernel allocations */
+#define VM_PT_KERNEL 3 /* Type for generic kernel allocations */
static inline unsigned short
vm_page_type(const struct vm_page *page)
@@ -401,29 +353,6 @@ vm_page_to_pa(const struct vm_page *page)
return page->phys_addr;
}
-#if 0
-static inline unsigned long
-vm_page_direct_va(phys_addr_t pa)
-{
- assert(pa < VM_PAGE_DIRECTMAP_LIMIT);
- return ((unsigned long)pa + VM_MIN_DIRECTMAP_ADDRESS);
-}
-
-static inline phys_addr_t
-vm_page_direct_pa(unsigned long va)
-{
- assert(va >= VM_MIN_DIRECTMAP_ADDRESS);
- assert(va < VM_MAX_DIRECTMAP_ADDRESS);
- return (va - VM_MIN_DIRECTMAP_ADDRESS);
-}
-
-static inline void *
-vm_page_direct_ptr(const struct vm_page *page)
-{
- return (void *)vm_page_direct_va(vm_page_to_pa(page));
-}
-#endif
-
/*
* Associate private data with a page.
*/
@@ -442,13 +371,18 @@ vm_page_get_priv(const struct vm_page *page)
/*
* Load physical memory into the vm_page module at boot time.
*
- * The avail_start and avail_end parameters are used to maintain a simple
- * heap for bootstrap allocations.
- *
* All addresses must be page-aligned. Segments can be loaded in any order.
*/
-void vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end,
- phys_addr_t avail_start, phys_addr_t avail_end);
+void vm_page_load(unsigned int seg_index, phys_addr_t start, phys_addr_t end);
+
+/*
+ * Load available physical memory into the vm_page module at boot time.
+ *
+ * The segment referred to must have been loaded with vm_page_load
+ * before loading its heap.
+ */
+void vm_page_load_heap(unsigned int seg_index, phys_addr_t start,
+ phys_addr_t end);
/*
* Return true if the vm_page module is completely initialized, false
@@ -521,6 +455,21 @@ const char * vm_page_seg_name(unsigned int seg_index);
void vm_page_info_all(void);
/*
+ * Return the maximum physical address for a given segment selector.
+ */
+phys_addr_t vm_page_seg_end(unsigned int selector);
+
+/*
+ * Return the total number of physical pages.
+ */
+unsigned long vm_page_table_size(void);
+
+/*
+ * Return the index of a page in the page table.
+ */
+unsigned long vm_page_table_index(phys_addr_t pa);
+
+/*
* Return the total amount of physical memory.
*/
phys_addr_t vm_page_mem_size(void);
@@ -533,4 +482,53 @@ phys_addr_t vm_page_mem_size(void);
*/
unsigned long vm_page_mem_free(void);
+/*
+ * Remove the given page from any page queue it might be in.
+ */
+void vm_page_queues_remove(struct vm_page *page);
+
+/*
+ * Balance physical pages among segments.
+ *
+ * This function should be called first by the pageout daemon
+ * on memory pressure, since it may be unnecessary to perform any
+ * other operation, let alone shrink caches, if balancing is
+ * enough to make enough free pages.
+ *
+ * Return TRUE if balancing made enough free pages for unprivileged
+ * allocations to succeed, in which case pending allocations are resumed.
+ *
+ * This function acquires vm_page_queue_free_lock, which is held on return.
+ */
+boolean_t vm_page_balance(void);
+
+/*
+ * Evict physical pages.
+ *
+ * This function should be called by the pageout daemon after balancing
+ * the segments and shrinking kernel caches.
+ *
+ * Return TRUE if eviction made enough free pages for unprivileged
+ * allocations to succeed, in which case pending allocations are resumed.
+ *
+ * Otherwise, report whether the pageout daemon should wait (some pages
+ * have been paged out) or not (only clean pages have been released).
+ *
+ * This function acquires vm_page_queue_free_lock, which is held on return.
+ */
+boolean_t vm_page_evict(boolean_t *should_wait);
+
+/*
+ * Turn active pages into inactive ones for second-chance LRU
+ * approximation.
+ *
+ * This function should be called by the pageout daemon on memory pressure,
+ * i.e. right before evicting pages.
+ *
+ * XXX This is probably not the best strategy, compared to keeping the
+ * active/inactive ratio in check at all times, but this means less
+ * frequent refills.
+ */
+void vm_page_refill_inactive(void);
+
#endif /* _VM_VM_PAGE_H_ */
diff --git a/vm/vm_pageout.c b/vm/vm_pageout.c
index f420804..dd0f995 100644
--- a/vm/vm_pageout.c
+++ b/vm/vm_pageout.c
@@ -53,140 +53,17 @@
#include <vm/vm_pageout.h>
#include <machine/locore.h>
-
-
-#ifndef VM_PAGEOUT_BURST_MAX
-#define VM_PAGEOUT_BURST_MAX 10 /* number of pages */
-#endif /* VM_PAGEOUT_BURST_MAX */
-
-#ifndef VM_PAGEOUT_BURST_MIN
-#define VM_PAGEOUT_BURST_MIN 5 /* number of pages */
-#endif /* VM_PAGEOUT_BURST_MIN */
-
-#ifndef VM_PAGEOUT_BURST_WAIT
-#define VM_PAGEOUT_BURST_WAIT 10 /* milliseconds per
page */
-#endif /* VM_PAGEOUT_BURST_WAIT */
-
-#ifndef VM_PAGEOUT_EMPTY_WAIT
-#define VM_PAGEOUT_EMPTY_WAIT 75 /* milliseconds */
-#endif /* VM_PAGEOUT_EMPTY_WAIT */
-
-#ifndef VM_PAGEOUT_PAUSE_MAX
-#define VM_PAGEOUT_PAUSE_MAX 10 /* number of pauses */
-#endif /* VM_PAGEOUT_PAUSE_MAX */
-
-/*
- * To obtain a reasonable LRU approximation, the inactive queue
- * needs to be large enough to give pages on it a chance to be
- * referenced a second time. This macro defines the fraction
- * of active+inactive pages that should be inactive.
- * The pageout daemon uses it to update vm_page_inactive_target.
- *
- * If the number of free pages falls below vm_page_free_target and
- * vm_page_inactive_count is below vm_page_inactive_target,
- * then the pageout daemon starts running.
- */
-
-#ifndef VM_PAGE_INACTIVE_TARGET
-#define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 2 / 3)
-#endif /* VM_PAGE_INACTIVE_TARGET */
-
/*
- * Once the pageout daemon starts running, it keeps going
- * until the number of free pages meets or exceeds vm_page_free_target.
+ * Event placeholder for pageout requests, synchronized with
+ * the free page queue lock.
*/
-
-#ifndef VM_PAGE_FREE_TARGET
-#define VM_PAGE_FREE_TARGET(free) (150 + (free) * 10 / 100)
-#endif /* VM_PAGE_FREE_TARGET */
+static int vm_pageout_requested;
/*
- * The pageout daemon always starts running once the number of free pages
- * falls below vm_page_free_min.
- */
-
-#ifndef VM_PAGE_FREE_MIN
-#define VM_PAGE_FREE_MIN(free) (100 + (free) * 8 / 100)
-#endif /* VM_PAGE_FREE_MIN */
-
-/* When vm_page_external_count exceeds vm_page_external_limit,
- * allocations of externally paged pages stops.
- */
-
-#ifndef VM_PAGE_EXTERNAL_LIMIT
-#define VM_PAGE_EXTERNAL_LIMIT(free) ((free) / 2)
-#endif /* VM_PAGE_EXTERNAL_LIMIT */
-
-/* Attempt to keep the number of externally paged pages less
- * than vm_pages_external_target.
+ * Event placeholder for pageout throttling, synchronized with
+ * the free page queue lock.
*/
-#ifndef VM_PAGE_EXTERNAL_TARGET
-#define VM_PAGE_EXTERNAL_TARGET(free) ((free) / 4)
-#endif /* VM_PAGE_EXTERNAL_TARGET */
-
-/*
- * When the number of free pages falls below vm_page_free_reserved,
- * only vm-privileged threads can allocate pages. vm-privilege
- * allows the pageout daemon and default pager (and any other
- * associated threads needed for default pageout) to continue
- * operation by dipping into the reserved pool of pages. */
-
-#ifndef VM_PAGE_FREE_RESERVED
-#define VM_PAGE_FREE_RESERVED 500
-#endif /* VM_PAGE_FREE_RESERVED */
-
-/*
- * When the number of free pages falls below vm_pageout_reserved_internal,
- * the pageout daemon no longer trusts external pagers to clean pages.
- * External pagers are probably all wedged waiting for a free page.
- * It forcibly double-pages dirty pages belonging to external objects,
- * getting the pages to the default pager to clean.
- */
-
-#ifndef VM_PAGEOUT_RESERVED_INTERNAL
-#define VM_PAGEOUT_RESERVED_INTERNAL(reserve) ((reserve) - 250)
-#endif /* VM_PAGEOUT_RESERVED_INTERNAL */
-
-/*
- * When the number of free pages falls below vm_pageout_reserved_really,
- * the pageout daemon stops work entirely to let the default pager
- * catch up (assuming the default pager has pages to clean).
- * Beyond this point, it is too dangerous to consume memory
- * even for memory_object_data_write messages to the default pager.
- */
-
-#ifndef VM_PAGEOUT_RESERVED_REALLY
-#define VM_PAGEOUT_RESERVED_REALLY(reserve) ((reserve) - 400)
-#endif /* VM_PAGEOUT_RESERVED_REALLY */
-
-unsigned int vm_pageout_reserved_internal = 0;
-unsigned int vm_pageout_reserved_really = 0;
-
-unsigned int vm_page_external_target = 0;
-
-unsigned int vm_pageout_burst_max = 0;
-unsigned int vm_pageout_burst_min = 0;
-unsigned int vm_pageout_burst_wait = 0; /* milliseconds per
page */
-unsigned int vm_pageout_empty_wait = 0; /* milliseconds */
-unsigned int vm_pageout_pause_count = 0;
-unsigned int vm_pageout_pause_max = 0;
-
-/*
- * These variables record the pageout daemon's actions:
- * how many pages it looks at and what happens to those pages.
- * No locking needed because only one thread modifies the variables.
- */
-
-unsigned int vm_pageout_active = 0; /* debugging */
-unsigned int vm_pageout_inactive = 0; /* debugging */
-unsigned int vm_pageout_inactive_nolock = 0; /* debugging */
-unsigned int vm_pageout_inactive_busy = 0; /* debugging */
-unsigned int vm_pageout_inactive_absent = 0; /* debugging */
-unsigned int vm_pageout_inactive_used = 0; /* debugging */
-unsigned int vm_pageout_inactive_clean = 0; /* debugging */
-unsigned int vm_pageout_inactive_dirty = 0; /* debugging */
-unsigned int vm_pageout_inactive_double = 0; /* debugging */
-unsigned int vm_pageout_inactive_cleaned_external = 0;
+static int vm_pageout_continue;
/*
* Routine: vm_pageout_setup
@@ -241,15 +118,20 @@ vm_pageout_setup(
/*
* If we are not flushing the page, allocate a
- * page in the object. If we cannot get the
- * page, flush instead.
+ * page in the object.
*/
if (!flush) {
- vm_object_lock(new_object);
- new_m = vm_page_alloc(new_object, new_offset);
- if (new_m == VM_PAGE_NULL)
- flush = TRUE;
- vm_object_unlock(new_object);
+ for (;;) {
+ vm_object_lock(new_object);
+ new_m = vm_page_alloc(new_object, new_offset);
+ vm_object_unlock(new_object);
+
+ if (new_m != VM_PAGE_NULL) {
+ break;
+ }
+
+ VM_PAGE_WAIT(NULL);
+ }
}
if (flush) {
@@ -354,26 +236,33 @@ vm_pageout_setup(
vm_page_lock_queues();
vm_stat.pageouts++;
if (m->laundry) {
+
/*
- * vm_pageout_scan is telling us to put this page
- * at the front of the inactive queue, so it will
- * be immediately paged out to the default pager.
+ * The caller is telling us that it is going to
+ * immediately double page this page to the default
+ * pager.
*/
assert(!old_object->internal);
m->laundry = FALSE;
-
- queue_enter_first(&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- m->inactive = TRUE;
- vm_page_inactive_count++;
} else if (old_object->internal) {
m->laundry = TRUE;
vm_page_laundry_count++;
vm_page_wire(m);
- } else
+ } else {
vm_page_activate(m);
+
+ /*
+ * If vm_page_external_pagedout is negative,
+ * the pageout daemon isn't expecting to be
+ * notified.
+ */
+
+ if (vm_page_external_pagedout >= 0) {
+ vm_page_external_pagedout++;
+ }
+ }
vm_page_unlock_queues();
/*
@@ -504,49 +393,35 @@ vm_pageout_page(
/*
* vm_pageout_scan does the dirty work for the pageout daemon.
- * It returns with vm_page_queue_free_lock held and
- * vm_page_free_wanted == 0.
+ *
+ * Return TRUE if the pageout daemon is done for now, FALSE otherwise,
+ * in which case should_wait indicates whether the pageout daemon
+ * should wait to allow pagers to keep up.
+ *
+ * It returns with vm_page_queue_free_lock held.
*/
-void vm_pageout_scan(void)
+boolean_t vm_pageout_scan(boolean_t *should_wait)
{
- unsigned int burst_count;
- unsigned int want_pages;
+ boolean_t done;
/*
- * We want to gradually dribble pages from the active queue
- * to the inactive queue. If we let the inactive queue get
- * very small, and then suddenly dump many pages into it,
- * those pages won't get a sufficient chance to be referenced
- * before we start taking them from the inactive queue.
- *
- * We must limit the rate at which we send pages to the pagers.
- * data_write messages consume memory, for message buffers and
- * for map-copy objects. If we get too far ahead of the pagers,
- * we can potentially run out of memory.
- *
- * We can use the laundry count to limit directly the number
- * of pages outstanding to the default pager. A similar
- * strategy for external pagers doesn't work, because
- * external pagers don't have to deallocate the pages sent them,
- * and because we might have to send pages to external pagers
- * even if they aren't processing writes. So we also
- * use a burst count to limit writes to external pagers.
- *
- * When memory is very tight, we can't rely on external pagers to
- * clean pages. They probably aren't running, because they
- * aren't vm-privileged. If we kept sending dirty pages to them,
- * we could exhaust the free list. However, we can't just ignore
- * pages belonging to external objects, because there might be no
- * pages belonging to internal objects. Hence, we get the page
- * into an internal object and then immediately double-page it,
- * sending it to the default pager.
- *
- * slab_collect should be last, because the other operations
- * might return memory to caches. When we pause we use
- * vm_pageout_scan_continue as our continuation, so we will
- * reenter vm_pageout_scan periodically and attempt to reclaim
- * internal memory even if we never reach vm_page_free_target.
+ * Try balancing pages among segments first, since this
+ * may be enough to resume unprivileged allocations.
+ */
+
+ /* This function returns with vm_page_queue_free_lock held */
+ done = vm_page_balance();
+
+ if (done) {
+ return TRUE;
+ }
+
+ simple_unlock(&vm_page_queue_free_lock);
+
+ /*
+ * Balancing is not enough. Shrink caches and scan pages
+ * for eviction.
*/
stack_collect();
@@ -555,428 +430,65 @@ void vm_pageout_scan(void)
if (0) /* XXX: pcb_collect doesn't do anything yet, so it is
pointless to call consider_thread_collect. */
consider_thread_collect();
- slab_collect();
-
- for (burst_count = 0;;) {
- vm_page_t m;
- vm_object_t object;
- unsigned long free_count;
-
- /*
- * Recalculate vm_page_inactivate_target.
- */
-
- vm_page_lock_queues();
- vm_page_inactive_target =
- VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
- vm_page_inactive_count);
-
- /*
- * Move pages from active to inactive.
- */
-
- while ((vm_page_inactive_count < vm_page_inactive_target) &&
- !queue_empty(&vm_page_queue_active)) {
- vm_object_t obj;
-
- vm_pageout_active++;
- m = (vm_page_t) queue_first(&vm_page_queue_active);
- assert(m->active && !m->inactive);
-
- obj = m->object;
- if (!vm_object_lock_try(obj)) {
- /*
- * Move page to end and continue.
- */
-
- queue_remove(&vm_page_queue_active, m,
- vm_page_t, pageq);
- queue_enter(&vm_page_queue_active, m,
- vm_page_t, pageq);
- vm_page_unlock_queues();
- vm_page_lock_queues();
- continue;
- }
-
- /*
- * If the page is busy, then we pull it
- * off the active queue and leave it alone.
- */
-
- if (m->busy) {
- vm_object_unlock(obj);
- queue_remove(&vm_page_queue_active, m,
- vm_page_t, pageq);
- m->active = FALSE;
- vm_page_active_count--;
- continue;
- }
-
- /*
- * Deactivate the page while holding the object
- * locked, so we know the page is still not busy.
- * This should prevent races between pmap_enter
- * and pmap_clear_reference. The page might be
- * absent or fictitious, but vm_page_deactivate
- * can handle that.
- */
-
- vm_page_deactivate(m);
- vm_object_unlock(obj);
- }
-
- /*
- * We are done if we have met our targets *and*
- * nobody is still waiting for a page.
- */
-
- simple_lock(&vm_page_queue_free_lock);
- free_count = vm_page_mem_free();
- if ((free_count >= vm_page_free_target) &&
- (vm_page_external_count <= vm_page_external_target) &&
- (vm_page_free_wanted == 0)) {
- vm_page_unlock_queues();
- break;
- }
- want_pages = ((free_count < vm_page_free_target) ||
- vm_page_free_wanted);
- simple_unlock(&vm_page_queue_free_lock);
-
- /*
- * Sometimes we have to pause:
- * 1) No inactive pages - nothing to do.
- * 2) Flow control - wait for pagers to catch up.
- * 3) Extremely low memory - sending out dirty pages
- * consumes memory. We don't take the risk of doing
- * this if the default pager already has work to do.
- */
- pause:
- if (queue_empty(&vm_page_queue_inactive) ||
- (burst_count >= vm_pageout_burst_max) ||
- (vm_page_laundry_count >= vm_pageout_burst_max) ||
- ((free_count < vm_pageout_reserved_really) &&
- (vm_page_laundry_count > 0))) {
- unsigned int pages, msecs;
-
- /*
- * vm_pageout_burst_wait is msecs/page.
- * If there is nothing for us to do, we wait
- * at least vm_pageout_empty_wait msecs.
- */
-
- if (vm_page_laundry_count > burst_count)
- pages = vm_page_laundry_count;
- else
- pages = burst_count;
- msecs = pages * vm_pageout_burst_wait;
-
- if (queue_empty(&vm_page_queue_inactive) &&
- (msecs < vm_pageout_empty_wait))
- msecs = vm_pageout_empty_wait;
- vm_page_unlock_queues();
-
- thread_will_wait_with_timeout(current_thread(), msecs);
- counter(c_vm_pageout_scan_block++);
- thread_block(vm_pageout_scan_continue);
- call_continuation(vm_pageout_scan_continue);
- /*NOTREACHED*/
- }
-
- vm_pageout_inactive++;
-
- /* Find a page we are interested in paging out. If we
- need pages, then we'll page anything out; otherwise
- we only page out external pages. */
- m = (vm_page_t) queue_first (&vm_page_queue_inactive);
- while (1)
- {
- assert (!m->active && m->inactive);
- if (want_pages || m->external)
- break;
-
- m = (vm_page_t) queue_next (&m->pageq);
- if (!m)
- goto pause;
- }
-
- object = m->object;
-
- /*
- * Try to lock object; since we've got the
- * page queues lock, we can only try for this one.
- */
- if (!vm_object_lock_try(object)) {
- /*
- * Move page to end and continue.
- */
-
- queue_remove(&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- queue_enter(&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- vm_page_unlock_queues();
- vm_pageout_inactive_nolock++;
- continue;
- }
-
- /*
- * Remove the page from the inactive list.
- */
-
- queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq);
- vm_page_inactive_count--;
- m->inactive = FALSE;
-
- if (m->busy || !object->alive) {
- /*
- * Somebody is already playing with this page.
- * Leave it off the pageout queues.
- */
-
- vm_page_unlock_queues();
- vm_object_unlock(object);
- vm_pageout_inactive_busy++;
- continue;
- }
-
- /*
- * If it's absent, we can reclaim the page.
- */
-
- if (want_pages && m->absent) {
- vm_pageout_inactive_absent++;
- reclaim_page:
- vm_page_free(m);
- vm_page_unlock_queues();
-
- if (vm_object_collectable(object))
- vm_object_collect(object);
- else
- vm_object_unlock(object);
-
- continue;
- }
-
- /*
- * If it's being used, reactivate.
- * (Fictitious pages are either busy or absent.)
- */
-
- assert(!m->fictitious);
- if (m->reference || pmap_is_referenced(m->phys_addr)) {
- vm_object_unlock(object);
- vm_page_activate(m);
- vm_stat.reactivations++;
- current_task()->reactivations++;
- vm_page_unlock_queues();
- vm_pageout_inactive_used++;
- continue;
- }
-
- /*
- * Eliminate all mappings.
- */
-
- m->busy = TRUE;
- pmap_page_protect(m->phys_addr, VM_PROT_NONE);
- if (!m->dirty)
- m->dirty = pmap_is_modified(m->phys_addr);
-
- if (m->external) {
- /* Figure out if we still care about this
- page in the limit of externally managed pages.
- Clean pages don't actually cause system hosage,
- so it's ok to stop considering them as
- "consumers" of memory. */
- if (m->dirty && !m->extcounted) {
- m->extcounted = TRUE;
- vm_page_external_count++;
- } else if (!m->dirty && m->extcounted) {
- m->extcounted = FALSE;
- vm_page_external_count--;
- }
- }
-
- /* If we don't actually need more memory, and the page
- is not dirty, put it on the tail of the inactive queue
- and move on to the next page. */
- if (!want_pages && !m->dirty) {
- queue_remove (&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- queue_enter (&vm_page_queue_inactive, m,
- vm_page_t, pageq);
- vm_page_unlock_queues();
- vm_pageout_inactive_cleaned_external++;
- continue;
- }
-
- /*
- * If it's clean and not precious, we can free the page.
- */
-
- if (!m->dirty && !m->precious) {
- vm_pageout_inactive_clean++;
- goto reclaim_page;
- }
-
- /*
- * If we are very low on memory, then we can't
- * rely on an external pager to clean a dirty page,
- * because external pagers are not vm-privileged.
- *
- * The laundry bit tells vm_pageout_setup to
- * put the page back at the front of the inactive
- * queue instead of activating the page. Hence,
- * we will pick the page up again immediately and
- * resend it to the default pager.
- */
-
- assert(!m->laundry);
- if ((free_count < vm_pageout_reserved_internal) &&
- !object->internal) {
- m->laundry = TRUE;
- vm_pageout_inactive_double++;
- }
- vm_page_unlock_queues();
-
- /*
- * If there is no memory object for the page, create
- * one and hand it to the default pager.
- * [First try to collapse, so we don't create
- * one unnecessarily.]
- */
-
- if (!object->pager_initialized)
- vm_object_collapse(object);
- if (!object->pager_initialized)
- vm_object_pager_create(object);
- if (!object->pager_initialized)
- panic("vm_pageout_scan");
-
- vm_pageout_inactive_dirty++;
- vm_pageout_page(m, FALSE, TRUE); /* flush it */
- vm_object_unlock(object);
- burst_count++;
- }
-}
-
-void vm_pageout_scan_continue(void)
-{
/*
- * We just paused to let the pagers catch up.
- * If vm_page_laundry_count is still high,
- * then we aren't waiting long enough.
- * If we have paused some vm_pageout_pause_max times without
- * adjusting vm_pageout_burst_wait, it might be too big,
- * so we decrease it.
+ * slab_collect should be last, because the other operations
+ * might return memory to caches.
*/
+ slab_collect();
- vm_page_lock_queues();
- if (vm_page_laundry_count > vm_pageout_burst_min) {
- vm_pageout_burst_wait++;
- vm_pageout_pause_count = 0;
- } else if (++vm_pageout_pause_count > vm_pageout_pause_max) {
- vm_pageout_burst_wait = (vm_pageout_burst_wait * 3) / 4;
- if (vm_pageout_burst_wait < 1)
- vm_pageout_burst_wait = 1;
- vm_pageout_pause_count = 0;
- }
- vm_page_unlock_queues();
+ vm_page_refill_inactive();
- vm_pageout_continue();
- /*NOTREACHED*/
+ /* This function returns with vm_page_queue_free_lock held */
+ return vm_page_evict(should_wait);
}
-/*
- * vm_pageout is the high level pageout daemon.
- */
-
-void vm_pageout_continue(void)
+void vm_pageout(void)
{
- /*
- * The pageout daemon is never done, so loop forever.
- * We should call vm_pageout_scan at least once each
- * time we are woken, even if vm_page_free_wanted is
- * zero, to check vm_page_free_target and
- * vm_page_inactive_target.
- */
+ boolean_t done, should_wait;
+
+ current_thread()->vm_privilege = 1;
+ stack_privilege(current_thread());
+ thread_set_own_priority(0);
for (;;) {
- vm_pageout_scan();
+ done = vm_pageout_scan(&should_wait);
/* we hold vm_page_queue_free_lock now */
- assert(vm_page_free_wanted == 0);
- assert_wait(&vm_page_free_wanted, FALSE);
- simple_unlock(&vm_page_queue_free_lock);
- counter(c_vm_pageout_block++);
- thread_block(vm_pageout_continue);
+ if (done) {
+ thread_sleep(&vm_pageout_requested,
+ simple_lock_addr(vm_page_queue_free_lock),
+ FALSE);
+ } else if (should_wait) {
+ assert_wait(&vm_pageout_continue, FALSE);
+ thread_set_timeout(500);
+ simple_unlock(&vm_page_queue_free_lock);
+ thread_block(NULL);
+ } else {
+ simple_unlock(&vm_page_queue_free_lock);
+ }
}
}
-void vm_pageout(void)
+/*
+ * Start pageout
+ *
+ * The free page queue lock must be held before calling this function.
+ */
+void vm_pageout_start(void)
{
- unsigned long free_after_reserve;
-
- current_thread()->vm_privilege = TRUE;
- stack_privilege(current_thread());
- thread_set_own_priority(0);
-
- /*
- * Initialize some paging parameters.
- */
-
- if (vm_pageout_burst_max == 0)
- vm_pageout_burst_max = VM_PAGEOUT_BURST_MAX;
-
- if (vm_pageout_burst_min == 0)
- vm_pageout_burst_min = VM_PAGEOUT_BURST_MIN;
-
- if (vm_pageout_burst_wait == 0)
- vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
-
- if (vm_pageout_empty_wait == 0)
- vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
-
- if (vm_page_free_reserved == 0)
- vm_page_free_reserved = VM_PAGE_FREE_RESERVED;
-
- if (vm_pageout_pause_max == 0)
- vm_pageout_pause_max = VM_PAGEOUT_PAUSE_MAX;
-
- if (vm_pageout_reserved_internal == 0)
- vm_pageout_reserved_internal =
- VM_PAGEOUT_RESERVED_INTERNAL(vm_page_free_reserved);
-
- if (vm_pageout_reserved_really == 0)
- vm_pageout_reserved_really =
- VM_PAGEOUT_RESERVED_REALLY(vm_page_free_reserved);
-
- free_after_reserve = vm_page_mem_free() - vm_page_free_reserved;
-
- if (vm_page_external_limit == 0)
- vm_page_external_limit =
- VM_PAGE_EXTERNAL_LIMIT (free_after_reserve);
-
- if (vm_page_external_target == 0)
- vm_page_external_target =
- VM_PAGE_EXTERNAL_TARGET (free_after_reserve);
-
- if (vm_page_free_min == 0)
- vm_page_free_min = vm_page_free_reserved +
- VM_PAGE_FREE_MIN(free_after_reserve);
-
- if (vm_page_free_target == 0)
- vm_page_free_target = vm_page_free_reserved +
- VM_PAGE_FREE_TARGET(free_after_reserve);
-
- if (vm_page_free_target < vm_page_free_min + 5)
- vm_page_free_target = vm_page_free_min + 5;
+ if (!current_thread())
+ return;
- /*
- * vm_pageout_scan will set vm_page_inactive_target.
- */
+ thread_wakeup_one(&vm_pageout_requested);
+}
- vm_pageout_continue();
- /*NOTREACHED*/
+/*
+ * Resume pageout
+ *
+ * The free page queue lock must be held before calling this function.
+ */
+void vm_pageout_resume(void)
+{
+ thread_wakeup_one(&vm_pageout_continue);
}
diff --git a/vm/vm_pageout.h b/vm/vm_pageout.h
index ea6cfaf..6ddd821 100644
--- a/vm/vm_pageout.h
+++ b/vm/vm_pageout.h
@@ -46,8 +46,8 @@ extern void vm_pageout_page(vm_page_t, boolean_t, boolean_t);
extern void vm_pageout(void) __attribute__((noreturn));
-extern void vm_pageout_continue(void) __attribute__((noreturn));
+extern void vm_pageout_start(void);
-extern void vm_pageout_scan_continue(void) __attribute__((noreturn));
+extern void vm_pageout_resume(void);
#endif /* _VM_VM_PAGEOUT_H_ */
diff --git a/vm/vm_resident.c b/vm/vm_resident.c
index ed867f5..e3e34dc 100644
--- a/vm/vm_resident.c
+++ b/vm/vm_resident.c
@@ -39,6 +39,7 @@
#include <mach/vm_prot.h>
#include <kern/counters.h>
#include <kern/debug.h>
+#include <kern/list.h>
#include <kern/sched_prim.h>
#include <kern/task.h>
#include <kern/thread.h>
@@ -92,25 +93,16 @@ typedef struct {
} vm_page_bucket_t;
vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
-unsigned int vm_page_bucket_count = 0; /* How big is array? */
-unsigned int vm_page_hash_mask; /* Mask for hash function */
+unsigned long vm_page_bucket_count = 0; /* How big is array? */
+unsigned long vm_page_hash_mask; /* Mask for hash function */
-vm_page_t vm_page_queue_fictitious;
+static struct list vm_page_queue_fictitious;
decl_simple_lock_data(,vm_page_queue_free_lock)
-unsigned int vm_page_free_wanted;
int vm_page_fictitious_count;
-int vm_page_external_count;
int vm_object_external_count;
int vm_object_external_pages;
/*
- * This variable isn't directly used. It's merely a placeholder for the
- * address used to synchronize threads waiting for pages to become
- * available. The real value is returned by vm_page_free_mem().
- */
-unsigned int vm_page_free_avail;
-
-/*
* Occasionally, the virtual memory system uses
* resident page structures that do not refer to
* real pages, for example to leave a page with
@@ -127,7 +119,7 @@ struct kmem_cache vm_page_cache;
* For debugging, this should be a strange value
* that the pmap module can recognize in assertions.
*/
-vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1;
+phys_addr_t vm_page_fictitious_addr = (phys_addr_t) -1;
/*
* Resident page structures are also chained on
@@ -136,8 +128,6 @@ vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1;
* defined here, but are shared by the pageout
* module.
*/
-queue_head_t vm_page_queue_active;
-queue_head_t vm_page_queue_inactive;
decl_simple_lock_data(,vm_page_queue_lock)
int vm_page_active_count;
int vm_page_inactive_count;
@@ -149,12 +139,8 @@ int vm_page_wire_count;
* (done here in vm_page_alloc) can trigger the
* pageout daemon.
*/
-int vm_page_free_target = 0;
-int vm_page_free_min = 0;
-int vm_page_inactive_target = 0;
-int vm_page_free_reserved = 0;
int vm_page_laundry_count = 0;
-int vm_page_external_limit = 0;
+int vm_page_external_pagedout = 0;
/*
@@ -192,11 +178,7 @@ void vm_page_bootstrap(
simple_lock_init(&vm_page_queue_free_lock);
simple_lock_init(&vm_page_queue_lock);
- vm_page_queue_fictitious = VM_PAGE_NULL;
- queue_init(&vm_page_queue_active);
- queue_init(&vm_page_queue_inactive);
-
- vm_page_free_wanted = 0;
+ list_init(&vm_page_queue_fictitious);
/*
* Allocate (and initialize) the virtual-to-physical
@@ -209,7 +191,7 @@ void vm_page_bootstrap(
*/
if (vm_page_bucket_count == 0) {
- unsigned int npages = pmap_free_pages();
+ unsigned long npages = vm_page_table_size();
vm_page_bucket_count = 1;
while (vm_page_bucket_count < npages)
@@ -331,6 +313,7 @@ void vm_page_module_init(void)
* table and object list.
*
* The object and page must be locked.
+ * The free page queue must not be locked.
*/
void vm_page_insert(
@@ -342,6 +325,14 @@ void vm_page_insert(
VM_PAGE_CHECK(mem);
+ assert(!mem->active && !mem->inactive);
+ assert(!mem->external);
+
+ if (!object->internal) {
+ mem->external = TRUE;
+ vm_object_external_pages++;
+ }
+
if (mem->tabled)
panic("vm_page_insert");
@@ -390,10 +381,6 @@ void vm_page_insert(
vm_page_deactivate(last_mem);
}
object->last_alloc = offset;
-
- if (!object->internal) {
- vm_object_external_pages++;
- }
}
/*
@@ -404,6 +391,7 @@ void vm_page_insert(
* and we don't do deactivate-behind.
*
* The object and page must be locked.
+ * The free page queue must not be locked.
*/
void vm_page_replace(
@@ -415,6 +403,14 @@ void vm_page_replace(
VM_PAGE_CHECK(mem);
+ assert(!mem->active && !mem->inactive);
+ assert(!mem->external);
+
+ if (!object->internal) {
+ mem->external = TRUE;
+ vm_object_external_pages++;
+ }
+
if (mem->tabled)
panic("vm_page_replace");
@@ -446,8 +442,10 @@ void vm_page_replace(
listq);
m->tabled = FALSE;
object->resident_page_count--;
+ VM_PAGE_QUEUES_REMOVE(m);
- if (!object->internal) {
+ if (m->external) {
+ m->external = FALSE;
vm_object_external_pages--;
}
@@ -483,19 +481,16 @@ void vm_page_replace(
object->resident_page_count++;
assert(object->resident_page_count != 0);
-
- if (!object->internal) {
- vm_object_external_pages++;
- }
}
/*
* vm_page_remove: [ internal use only ]
*
* Removes the given mem entry from the object/offset-page
- * table and the object page list.
+ * table, the object page list, and the page queues.
*
* The object and page must be locked.
+ * The free page queue must not be locked.
*/
void vm_page_remove(
@@ -543,7 +538,10 @@ void vm_page_remove(
mem->tabled = FALSE;
- if (!mem->object->internal) {
+ VM_PAGE_QUEUES_REMOVE(mem);
+
+ if (mem->external) {
+ mem->external = FALSE;
vm_object_external_pages--;
}
}
@@ -656,11 +654,15 @@ vm_page_t vm_page_grab_fictitious(void)
vm_page_t m;
simple_lock(&vm_page_queue_free_lock);
- m = vm_page_queue_fictitious;
- if (m != VM_PAGE_NULL) {
- vm_page_fictitious_count--;
- vm_page_queue_fictitious = (vm_page_t) m->pageq.next;
+ if (list_empty(&vm_page_queue_fictitious)) {
+ m = VM_PAGE_NULL;
+ } else {
+ m = list_first_entry(&vm_page_queue_fictitious,
+ struct vm_page, node);
+ assert(m->fictitious);
+ list_remove(&m->node);
m->free = FALSE;
+ vm_page_fictitious_count--;
}
simple_unlock(&vm_page_queue_free_lock);
@@ -680,8 +682,7 @@ static void vm_page_release_fictitious(
if (m->free)
panic("vm_page_release_fictitious");
m->free = TRUE;
- m->pageq.next = (queue_entry_t) vm_page_queue_fictitious;
- vm_page_queue_fictitious = m;
+ list_insert_head(&vm_page_queue_fictitious, &m->node);
vm_page_fictitious_count++;
simple_unlock(&vm_page_queue_free_lock);
}
@@ -720,9 +721,7 @@ void vm_page_more_fictitious(void)
* The object referenced by *MP must be locked.
*/
-boolean_t vm_page_convert(
- struct vm_page **mp,
- boolean_t external)
+boolean_t vm_page_convert(struct vm_page **mp)
{
struct vm_page *real_m, *fict_m;
vm_object_t object;
@@ -735,7 +734,7 @@ boolean_t vm_page_convert(
assert(!fict_m->active);
assert(!fict_m->inactive);
- real_m = vm_page_grab(external);
+ real_m = vm_page_grab();
if (real_m == VM_PAGE_NULL)
return FALSE;
@@ -766,27 +765,21 @@ boolean_t vm_page_convert(
* Returns VM_PAGE_NULL if the free list is too small.
*/
-vm_page_t vm_page_grab(
- boolean_t external)
+vm_page_t vm_page_grab(void)
{
vm_page_t mem;
simple_lock(&vm_page_queue_free_lock);
/*
- * Only let privileged threads (involved in pageout)
- * dip into the reserved pool or exceed the limit
- * for externally-managed pages.
+ * XXX Mach has many modules that merely assume memory is
+ * directly mapped in kernel space. Instead of updating all
+ * users, we assume those which need specific physical memory
+ * properties will wire down their pages, either because
+ * they can't be paged (not part of an object), or with
+ * explicit VM calls. The strategy is then to let memory
+ * pressure balance the physical segments with pageable pages.
*/
-
- if (((vm_page_mem_free() < vm_page_free_reserved)
- || (external
- && (vm_page_external_count > vm_page_external_limit)))
- && !current_thread()->vm_privilege) {
- simple_unlock(&vm_page_queue_free_lock);
- return VM_PAGE_NULL;
- }
-
mem = vm_page_alloc_pa(0, VM_PAGE_SEL_DIRECTMAP, VM_PT_KERNEL);
if (mem == NULL) {
@@ -794,35 +787,15 @@ vm_page_t vm_page_grab(
return NULL;
}
- if (external)
- vm_page_external_count++;
-
mem->free = FALSE;
- mem->extcounted = mem->external = external;
simple_unlock(&vm_page_queue_free_lock);
- /*
- * Decide if we should poke the pageout daemon.
- * We do this if the free count is less than the low
- * water mark, or if the free count is less than the high
- * water mark (but above the low water mark) and the inactive
- * count is less than its target.
- *
- * We don't have the counts locked ... if they change a little,
- * it doesn't really matter.
- */
-
- if ((vm_page_mem_free() < vm_page_free_min) ||
- ((vm_page_mem_free() < vm_page_free_target) &&
- (vm_page_inactive_count < vm_page_inactive_target)))
- thread_wakeup((event_t) &vm_page_free_wanted);
-
return mem;
}
-vm_offset_t vm_page_grab_phys_addr(void)
+phys_addr_t vm_page_grab_phys_addr(void)
{
- vm_page_t p = vm_page_grab(FALSE);
+ vm_page_t p = vm_page_grab();
if (p == VM_PAGE_NULL)
return -1;
else
@@ -835,8 +808,9 @@ vm_offset_t vm_page_grab_phys_addr(void)
* Return a page to the free list.
*/
-static void vm_page_release(
+void vm_page_release(
vm_page_t mem,
+ boolean_t laundry,
boolean_t external)
{
simple_lock(&vm_page_queue_free_lock);
@@ -844,33 +818,28 @@ static void vm_page_release(
panic("vm_page_release");
mem->free = TRUE;
vm_page_free_pa(mem, 0);
- if (external)
- vm_page_external_count--;
+ if (laundry) {
+ vm_page_laundry_count--;
- /*
- * Check if we should wake up someone waiting for page.
- * But don't bother waking them unless they can allocate.
- *
- * We wakeup only one thread, to prevent starvation.
- * Because the scheduling system handles wait queues FIFO,
- * if we wakeup all waiting threads, one greedy thread
- * can starve multiple niceguy threads. When the threads
- * all wakeup, the greedy threads runs first, grabs the page,
- * and waits for another page. It will be the first to run
- * when the next page is freed.
- *
- * However, there is a slight danger here.
- * The thread we wake might not use the free page.
- * Then the other threads could wait indefinitely
- * while the page goes unused. To forestall this,
- * the pageout daemon will keep making free pages
- * as long as vm_page_free_wanted is non-zero.
- */
+ if (vm_page_laundry_count == 0) {
+ vm_pageout_resume();
+ }
+ }
+ if (external) {
- if ((vm_page_free_wanted > 0) &&
- (vm_page_mem_free() >= vm_page_free_reserved)) {
- vm_page_free_wanted--;
- thread_wakeup_one((event_t) &vm_page_free_avail);
+ /*
+ * If vm_page_external_pagedout is negative,
+ * the pageout daemon isn't expecting to be
+ * notified.
+ */
+
+ if (vm_page_external_pagedout > 0) {
+ vm_page_external_pagedout--;
+ }
+
+ if (vm_page_external_pagedout == 0) {
+ vm_pageout_resume();
+ }
}
simple_unlock(&vm_page_queue_free_lock);
@@ -895,18 +864,6 @@ vm_page_t vm_page_grab_contig(
simple_lock(&vm_page_queue_free_lock);
- /*
- * Only let privileged threads (involved in pageout)
- * dip into the reserved pool or exceed the limit
- * for externally-managed pages.
- */
-
- if (((vm_page_mem_free() - nr_pages) <= vm_page_free_reserved)
- && !current_thread()->vm_privilege) {
- simple_unlock(&vm_page_queue_free_lock);
- return VM_PAGE_NULL;
- }
-
/* TODO Allow caller to pass type */
mem = vm_page_alloc_pa(order, selector, VM_PT_KERNEL);
@@ -917,27 +874,10 @@ vm_page_t vm_page_grab_contig(
for (i = 0; i < nr_pages; i++) {
mem[i].free = FALSE;
- mem[i].extcounted = mem[i].external = 0;
}
simple_unlock(&vm_page_queue_free_lock);
- /*
- * Decide if we should poke the pageout daemon.
- * We do this if the free count is less than the low
- * water mark, or if the free count is less than the high
- * water mark (but above the low water mark) and the inactive
- * count is less than its target.
- *
- * We don't have the counts locked ... if they change a little,
- * it doesn't really matter.
- */
-
- if ((vm_page_mem_free() < vm_page_free_min) ||
- ((vm_page_mem_free() < vm_page_free_target) &&
- (vm_page_inactive_count < vm_page_inactive_target)))
- thread_wakeup((event_t) &vm_page_free_wanted);
-
return mem;
}
@@ -965,53 +905,10 @@ void vm_page_free_contig(vm_page_t mem, vm_size_t size)
vm_page_free_pa(mem, order);
- if ((vm_page_free_wanted > 0) &&
- (vm_page_mem_free() >= vm_page_free_reserved)) {
- vm_page_free_wanted--;
- thread_wakeup_one((event_t) &vm_page_free_avail);
- }
-
simple_unlock(&vm_page_queue_free_lock);
}
/*
- * vm_page_wait:
- *
- * Wait for a page to become available.
- * If there are plenty of free pages, then we don't sleep.
- */
-
-void vm_page_wait(
- void (*continuation)(void))
-{
-
- /*
- * We can't use vm_page_free_reserved to make this
- * determination. Consider: some thread might
- * need to allocate two pages. The first allocation
- * succeeds, the second fails. After the first page is freed,
- * a call to vm_page_wait must really block.
- */
-
- simple_lock(&vm_page_queue_free_lock);
- if ((vm_page_mem_free() < vm_page_free_target)
- || (vm_page_external_count > vm_page_external_limit)) {
- if (vm_page_free_wanted++ == 0)
- thread_wakeup((event_t)&vm_page_free_wanted);
- assert_wait((event_t)&vm_page_free_avail, FALSE);
- simple_unlock(&vm_page_queue_free_lock);
- if (continuation != 0) {
- counter(c_vm_page_wait_block_user++);
- thread_block(continuation);
- } else {
- counter(c_vm_page_wait_block_kernel++);
- thread_block((void (*)(void)) 0);
- }
- } else
- simple_unlock(&vm_page_queue_free_lock);
-}
-
-/*
* vm_page_alloc:
*
* Allocate and return a memory cell associated
@@ -1026,7 +923,7 @@ vm_page_t vm_page_alloc(
{
vm_page_t mem;
- mem = vm_page_grab(!object->internal);
+ mem = vm_page_grab();
if (mem == VM_PAGE_NULL)
return VM_PAGE_NULL;
@@ -1051,9 +948,11 @@ void vm_page_free(
if (mem->free)
panic("vm_page_free");
- if (mem->tabled)
+ if (mem->tabled) {
vm_page_remove(mem);
- VM_PAGE_QUEUES_REMOVE(mem);
+ }
+
+ assert(!mem->active && !mem->inactive);
if (mem->wire_count != 0) {
if (!mem->private && !mem->fictitious)
@@ -1061,11 +960,6 @@ void vm_page_free(
mem->wire_count = 0;
}
- if (mem->laundry) {
- vm_page_laundry_count--;
- mem->laundry = FALSE;
- }
-
PAGE_WAKEUP_DONE(mem);
if (mem->absent)
@@ -1082,117 +976,10 @@ void vm_page_free(
mem->fictitious = TRUE;
vm_page_release_fictitious(mem);
} else {
- int external = mem->external && mem->extcounted;
+ boolean_t laundry = mem->laundry;
+ boolean_t external = mem->external;
vm_page_init(mem);
- vm_page_release(mem, external);
- }
-}
-
-/*
- * vm_page_wire:
- *
- * Mark this page as wired down by yet
- * another map, removing it from paging queues
- * as necessary.
- *
- * The page's object and the page queues must be locked.
- */
-void vm_page_wire(
- vm_page_t mem)
-{
- VM_PAGE_CHECK(mem);
-
- if (mem->wire_count == 0) {
- VM_PAGE_QUEUES_REMOVE(mem);
- if (!mem->private && !mem->fictitious)
- vm_page_wire_count++;
- }
- mem->wire_count++;
-}
-
-/*
- * vm_page_unwire:
- *
- * Release one wiring of this page, potentially
- * enabling it to be paged again.
- *
- * The page's object and the page queues must be locked.
- */
-void vm_page_unwire(
- vm_page_t mem)
-{
- VM_PAGE_CHECK(mem);
-
- if (--mem->wire_count == 0) {
- queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
- vm_page_active_count++;
- mem->active = TRUE;
- if (!mem->private && !mem->fictitious)
- vm_page_wire_count--;
- }
-}
-
-/*
- * vm_page_deactivate:
- *
- * Returns the given page to the inactive list,
- * indicating that no physical maps have access
- * to this page. [Used by the physical mapping system.]
- *
- * The page queues must be locked.
- */
-void vm_page_deactivate(
- vm_page_t m)
-{
- VM_PAGE_CHECK(m);
-
- /*
- * This page is no longer very interesting. If it was
- * interesting (active or inactive/referenced), then we
- * clear the reference bit and (re)enter it in the
- * inactive queue. Note wired pages should not have
- * their reference bit cleared.
- */
-
- if (m->active || (m->inactive && m->reference)) {
- if (!m->fictitious && !m->absent)
- pmap_clear_reference(m->phys_addr);
- m->reference = FALSE;
- VM_PAGE_QUEUES_REMOVE(m);
- }
- if (m->wire_count == 0 && !m->inactive) {
- queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
- m->inactive = TRUE;
- vm_page_inactive_count++;
- }
-}
-
-/*
- * vm_page_activate:
- *
- * Put the specified page on the active list (if appropriate).
- *
- * The page queues must be locked.
- */
-
-void vm_page_activate(
- vm_page_t m)
-{
- VM_PAGE_CHECK(m);
-
- if (m->inactive) {
- queue_remove(&vm_page_queue_inactive, m, vm_page_t,
- pageq);
- vm_page_inactive_count--;
- m->inactive = FALSE;
- }
- if (m->wire_count == 0) {
- if (m->active)
- panic("vm_page_activate: already active");
-
- queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
- m->active = TRUE;
- vm_page_active_count++;
+ vm_page_release(mem, laundry, external);
}
}
diff --git a/xen/block.c b/xen/block.c
index 7d6f1ca..2568598 100644
--- a/xen/block.c
+++ b/xen/block.c
@@ -457,7 +457,7 @@ device_read (void *d, ipc_port_t reply_port,
/* Allocate pages. */
while (alloc_offset < offset + len)
{
- while ((m = vm_page_grab (FALSE)) == 0)
+ while ((m = vm_page_grab ()) == 0)
VM_PAGE_WAIT (0);
assert (! m->active && ! m->inactive);
m->busy = TRUE;
@@ -568,7 +568,10 @@ device_write(void *d, ipc_port_t reply_port,
{
io_return_t err = 0;
vm_map_copy_t copy = (vm_map_copy_t) data;
- vm_offset_t aligned_buffer = 0;
+ vm_offset_t buffer = 0;
+ char *map_data;
+ vm_offset_t map_addr;
+ vm_size_t map_size;
unsigned copy_npages = atop(round_page(count));
vm_offset_t phys_addrs[copy_npages];
struct block_data *bd = d;
@@ -576,6 +579,7 @@ device_write(void *d, ipc_port_t reply_port,
grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
unsigned reqn, size;
unsigned i, nbpages, j;
+ kern_return_t kr;
if (!(bd->mode & D_WRITE))
return D_READ_ONLY;
@@ -591,31 +595,24 @@ device_write(void *d, ipc_port_t reply_port,
if (count > copy->size)
return D_INVALID_SIZE;
- if (copy->type != VM_MAP_COPY_PAGE_LIST || copy->offset & PAGE_MASK) {
- /* Unaligned write. Has to copy data before passing it to the backend. */
- kern_return_t kr;
- vm_offset_t buffer;
-
- kr = kmem_alloc(device_io_map, &aligned_buffer, count);
- if (kr != KERN_SUCCESS)
- return kr;
-
- kr = vm_map_copyout(device_io_map, &buffer, vm_map_copy_copy(copy));
- if (kr != KERN_SUCCESS) {
- kmem_free(device_io_map, aligned_buffer, count);
- return kr;
- }
-
- memcpy((void*) aligned_buffer, (void*) buffer, count);
+ /* XXX The underlying physical pages of the mapping could be highmem,
+ for which drivers require the use of a bounce buffer. */
+ kr = kmem_alloc(device_io_map, &buffer, count);
+ if (kr != KERN_SUCCESS)
+ return kr;
+
+ kr = kmem_io_map_copyout(device_io_map, (vm_offset_t *)&map_data,
+ &map_addr, &map_size, copy, count);
+ if (kr != KERN_SUCCESS) {
+ kmem_free(device_io_map, buffer, count);
+ return kr;
+ }
- vm_deallocate (device_io_map, buffer, count);
+ memcpy((void *)buffer, map_data, count);
+ kmem_io_map_deallocate(device_io_map, map_addr, map_size);
- for (i = 0; i < copy_npages; i++)
- phys_addrs[i] = kvtophys(aligned_buffer + ptoa(i));
- } else {
- for (i = 0; i < copy_npages; i++)
- phys_addrs[i] = copy->cpy_page_list[i]->phys_addr;
- }
+ for (i = 0; i < copy_npages; i++)
+ phys_addrs[i] = kvtophys(buffer + ptoa(i));
for (i=0; i<copy_npages; i+=nbpages) {
@@ -674,8 +671,8 @@ device_write(void *d, ipc_port_t reply_port,
}
}
- if (aligned_buffer)
- kmem_free(device_io_map, aligned_buffer, count);
+ if (buffer)
+ kmem_free(device_io_map, buffer, count);
vm_map_copy_discard (copy);
diff --git a/xen/console.c b/xen/console.c
index 9e8db8f..aed63cb 100644
--- a/xen/console.c
+++ b/xen/console.c
@@ -116,8 +116,8 @@ static void hypcnintr(int unit, spl_t spl, void *ret_addr,
void *regs) {
mb();
console->in_cons++;
#if MACH_KDB
- if (c == (char)'�') {
- printf("� pressed\n");
+ if (c == (char)0xA3) {
+ printf("pound pressed\n");
kdb_kintr();
continue;
}
diff --git a/xen/net.c b/xen/net.c
index 5a3f90d..1112138 100644
--- a/xen/net.c
+++ b/xen/net.c
@@ -29,6 +29,7 @@
#include <device/net_io.h>
#include <device/device_reply.user.h>
#include <device/device_emul.h>
+#include <device/ds_routines.h>
#include <intel/pmap.h>
#include <xen/public/io/netif.h>
#include <xen/public/memory.h>
@@ -601,9 +602,11 @@ device_write(void *d, ipc_port_t reply_port,
struct ifnet *ifp = &nd->ifnet;
netif_tx_request_t *req;
unsigned reqn;
- vm_offset_t offset;
- vm_page_t m;
- vm_size_t size;
+ vm_offset_t buffer;
+ char *map_data;
+ vm_offset_t map_addr;
+ vm_size_t map_size;
+ kern_return_t kr;
/* The maximum that we can handle. */
assert(ifp->if_header_size + ifp->if_mtu <= PAGE_SIZE);
@@ -617,26 +620,21 @@ device_write(void *d, ipc_port_t reply_port,
assert(copy->cpy_npages <= 2);
assert(copy->cpy_npages >= 1);
- offset = copy->offset & PAGE_MASK;
- if (paranoia || copy->cpy_npages == 2) {
- /* have to copy :/ */
- while ((m = vm_page_grab(FALSE)) == 0)
- VM_PAGE_WAIT (0);
- assert (! m->active && ! m->inactive);
- m->busy = TRUE;
+ kr = kmem_alloc(device_io_map, &buffer, count);
- if (copy->cpy_npages == 1)
- size = count;
- else
- size = PAGE_SIZE - offset;
+ if (kr != KERN_SUCCESS)
+ return kr;
+
+ kr = kmem_io_map_copyout(device_io_map, (vm_offset_t *)&map_data,
+ &map_addr, &map_size, copy, count);
- memcpy((void*)phystokv(m->phys_addr),
(void*)phystokv(copy->cpy_page_list[0]->phys_addr + offset), size);
- if (copy->cpy_npages == 2)
- memcpy((void*)phystokv(m->phys_addr + size),
(void*)phystokv(copy->cpy_page_list[1]->phys_addr), count - size);
+ if (kr != KERN_SUCCESS) {
+ kmem_free(device_io_map, buffer, count);
+ return kr;
+ }
- offset = 0;
- } else
- m = copy->cpy_page_list[0];
+ memcpy((void *)buffer, map_data, count);
+ kmem_io_map_deallocate(device_io_map, map_addr, map_size);
/* allocate a request */
spl_t spl = splimp();
@@ -653,8 +651,8 @@ device_write(void *d, ipc_port_t reply_port,
(void) splx(spl);
req = RING_GET_REQUEST(&nd->tx, reqn);
- req->gref = gref = hyp_grant_give(nd->domid, atop(m->phys_addr), 1);
- req->offset = offset;
+ req->gref = gref = hyp_grant_give(nd->domid, atop(kvtophys(buffer)), 1);
+ req->offset = 0;
req->flags = 0;
req->id = gref;
req->size = count;
@@ -685,11 +683,11 @@ device_write(void *d, ipc_port_t reply_port,
/* Suitable for Ethernet only. */
header = (struct ether_header *) (net_kmsg (kmsg)->header);
packet = (struct packet_header *) (net_kmsg (kmsg)->packet);
- memcpy (header, (void*)phystokv(m->phys_addr + offset), sizeof
(struct ether_header));
+ memcpy (header, (void*)buffer, sizeof (struct ether_header));
/* packet is prefixed with a struct packet_header,
see include/device/net_status.h. */
- memcpy (packet + 1, (void*)phystokv(m->phys_addr + offset +
sizeof (struct ether_header)),
+ memcpy (packet + 1, (void*)buffer + sizeof (struct ether_header),
count - sizeof (struct ether_header));
packet->length = count - sizeof (struct ether_header)
+ sizeof (struct packet_header);
@@ -702,8 +700,7 @@ device_write(void *d, ipc_port_t reply_port,
}
}
- if (paranoia || copy->cpy_npages == 2)
- VM_PAGE_FREE(m);
+ kmem_free(device_io_map, buffer, count);
vm_map_copy_discard (copy);
--
Alioth's /usr/local/bin/git-commit-notice on
/srv/git.debian.org/git/pkg-hurd/gnumach.git