ignore/
walk.rs

1use std::cmp;
2use std::ffi::OsStr;
3use std::fmt;
4use std::fs::{self, FileType, Metadata};
5use std::io;
6use std::path::{Path, PathBuf};
7use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
8use std::sync::Arc;
9use std::thread;
10use std::time::Duration;
11use std::vec;
12
13use channel;
14use same_file::Handle;
15use walkdir::{self, WalkDir};
16
17use dir::{Ignore, IgnoreBuilder};
18use gitignore::GitignoreBuilder;
19use overrides::Override;
20use types::Types;
21use {Error, PartialErrorBuilder};
22
23/// A directory entry with a possible error attached.
24///
25/// The error typically refers to a problem parsing ignore files in a
26/// particular directory.
27#[derive(Clone, Debug)]
28pub struct DirEntry {
29    dent: DirEntryInner,
30    err: Option<Error>,
31}
32
33impl DirEntry {
34    /// The full path that this entry represents.
35    pub fn path(&self) -> &Path {
36        self.dent.path()
37    }
38
39    /// The full path that this entry represents.
40    /// Analogous to [`path`], but moves ownership of the path.
41    ///
42    /// [`path`]: struct.DirEntry.html#method.path
43    pub fn into_path(self) -> PathBuf {
44        self.dent.into_path()
45    }
46
47    /// Whether this entry corresponds to a symbolic link or not.
48    pub fn path_is_symlink(&self) -> bool {
49        self.dent.path_is_symlink()
50    }
51
52    /// Returns true if and only if this entry corresponds to stdin.
53    ///
54    /// i.e., The entry has depth 0 and its file name is `-`.
55    pub fn is_stdin(&self) -> bool {
56        self.dent.is_stdin()
57    }
58
59    /// Return the metadata for the file that this entry points to.
60    pub fn metadata(&self) -> Result<Metadata, Error> {
61        self.dent.metadata()
62    }
63
64    /// Return the file type for the file that this entry points to.
65    ///
66    /// This entry doesn't have a file type if it corresponds to stdin.
67    pub fn file_type(&self) -> Option<FileType> {
68        self.dent.file_type()
69    }
70
71    /// Return the file name of this entry.
72    ///
73    /// If this entry has no file name (e.g., `/`), then the full path is
74    /// returned.
75    pub fn file_name(&self) -> &OsStr {
76        self.dent.file_name()
77    }
78
79    /// Returns the depth at which this entry was created relative to the root.
80    pub fn depth(&self) -> usize {
81        self.dent.depth()
82    }
83
84    /// Returns the underlying inode number if one exists.
85    ///
86    /// If this entry doesn't have an inode number, then `None` is returned.
87    #[cfg(unix)]
88    pub fn ino(&self) -> Option<u64> {
89        self.dent.ino()
90    }
91
92    /// Returns an error, if one exists, associated with processing this entry.
93    ///
94    /// An example of an error is one that occurred while parsing an ignore
95    /// file. Errors related to traversing a directory tree itself are reported
96    /// as part of yielding the directory entry, and not with this method.
97    pub fn error(&self) -> Option<&Error> {
98        self.err.as_ref()
99    }
100
101    /// Returns true if and only if this entry points to a directory.
102    pub(crate) fn is_dir(&self) -> bool {
103        self.dent.is_dir()
104    }
105
106    fn new_stdin() -> DirEntry {
107        DirEntry {
108            dent: DirEntryInner::Stdin,
109            err: None,
110        }
111    }
112
113    fn new_walkdir(dent: walkdir::DirEntry, err: Option<Error>) -> DirEntry {
114        DirEntry {
115            dent: DirEntryInner::Walkdir(dent),
116            err: err,
117        }
118    }
119
120    fn new_raw(dent: DirEntryRaw, err: Option<Error>) -> DirEntry {
121        DirEntry {
122            dent: DirEntryInner::Raw(dent),
123            err: err,
124        }
125    }
126}
127
128/// DirEntryInner is the implementation of DirEntry.
129///
130/// It specifically represents three distinct sources of directory entries:
131///
132/// 1. From the walkdir crate.
133/// 2. Special entries that represent things like stdin.
134/// 3. From a path.
135///
136/// Specifically, (3) has to essentially re-create the DirEntry implementation
137/// from WalkDir.
138#[derive(Clone, Debug)]
139enum DirEntryInner {
140    Stdin,
141    Walkdir(walkdir::DirEntry),
142    Raw(DirEntryRaw),
143}
144
145impl DirEntryInner {
146    fn path(&self) -> &Path {
147        use self::DirEntryInner::*;
148        match *self {
149            Stdin => Path::new("<stdin>"),
150            Walkdir(ref x) => x.path(),
151            Raw(ref x) => x.path(),
152        }
153    }
154
155    fn into_path(self) -> PathBuf {
156        use self::DirEntryInner::*;
157        match self {
158            Stdin => PathBuf::from("<stdin>"),
159            Walkdir(x) => x.into_path(),
160            Raw(x) => x.into_path(),
161        }
162    }
163
164    fn path_is_symlink(&self) -> bool {
165        use self::DirEntryInner::*;
166        match *self {
167            Stdin => false,
168            Walkdir(ref x) => x.path_is_symlink(),
169            Raw(ref x) => x.path_is_symlink(),
170        }
171    }
172
173    fn is_stdin(&self) -> bool {
174        match *self {
175            DirEntryInner::Stdin => true,
176            _ => false,
177        }
178    }
179
180    fn metadata(&self) -> Result<Metadata, Error> {
181        use self::DirEntryInner::*;
182        match *self {
183            Stdin => {
184                let err = Error::Io(io::Error::new(
185                    io::ErrorKind::Other,
186                    "<stdin> has no metadata",
187                ));
188                Err(err.with_path("<stdin>"))
189            }
190            Walkdir(ref x) => x
191                .metadata()
192                .map_err(|err| Error::Io(io::Error::from(err)).with_path(x.path())),
193            Raw(ref x) => x.metadata(),
194        }
195    }
196
197    fn file_type(&self) -> Option<FileType> {
198        use self::DirEntryInner::*;
199        match *self {
200            Stdin => None,
201            Walkdir(ref x) => Some(x.file_type()),
202            Raw(ref x) => Some(x.file_type()),
203        }
204    }
205
206    fn file_name(&self) -> &OsStr {
207        use self::DirEntryInner::*;
208        match *self {
209            Stdin => OsStr::new("<stdin>"),
210            Walkdir(ref x) => x.file_name(),
211            Raw(ref x) => x.file_name(),
212        }
213    }
214
215    fn depth(&self) -> usize {
216        use self::DirEntryInner::*;
217        match *self {
218            Stdin => 0,
219            Walkdir(ref x) => x.depth(),
220            Raw(ref x) => x.depth(),
221        }
222    }
223
224    #[cfg(unix)]
225    fn ino(&self) -> Option<u64> {
226        use self::DirEntryInner::*;
227        use walkdir::DirEntryExt;
228        match *self {
229            Stdin => None,
230            Walkdir(ref x) => Some(x.ino()),
231            Raw(ref x) => Some(x.ino()),
232        }
233    }
234
235    /// Returns true if and only if this entry points to a directory.
236    fn is_dir(&self) -> bool {
237        self.file_type().map(|ft| ft.is_dir()).unwrap_or(false)
238    }
239}
240
241/// DirEntryRaw is essentially copied from the walkdir crate so that we can
242/// build `DirEntry`s from whole cloth in the parallel iterator.
243#[derive(Clone)]
244struct DirEntryRaw {
245    /// The path as reported by the `fs::ReadDir` iterator (even if it's a
246    /// symbolic link).
247    path: PathBuf,
248    /// The file type. Necessary for recursive iteration, so store it.
249    ty: FileType,
250    /// Is set when this entry was created from a symbolic link and the user
251    /// expects the iterator to follow symbolic links.
252    follow_link: bool,
253    /// The depth at which this entry was generated relative to the root.
254    depth: usize,
255    /// The underlying inode number (Unix only).
256    #[cfg(unix)]
257    ino: u64,
258    /// The underlying metadata (Windows only). We store this on Windows
259    /// because this comes for free while reading a directory.
260    #[cfg(windows)]
261    metadata: fs::Metadata,
262}
263
264impl fmt::Debug for DirEntryRaw {
265    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
266        // Leaving out FileType because it doesn't have a debug impl
267        // in Rust 1.9. We could add it if we really wanted to by manually
268        // querying each possibly file type. Meh. ---AG
269        f.debug_struct("DirEntryRaw")
270            .field("path", &self.path)
271            .field("follow_link", &self.follow_link)
272            .field("depth", &self.depth)
273            .finish()
274    }
275}
276
277impl DirEntryRaw {
278    fn path(&self) -> &Path {
279        &self.path
280    }
281
282    fn into_path(self) -> PathBuf {
283        self.path
284    }
285
286    fn path_is_symlink(&self) -> bool {
287        self.ty.is_symlink() || self.follow_link
288    }
289
290    fn metadata(&self) -> Result<Metadata, Error> {
291        self.metadata_internal()
292    }
293
294    #[cfg(windows)]
295    fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
296        if self.follow_link {
297            fs::metadata(&self.path)
298        } else {
299            Ok(self.metadata.clone())
300        }
301        .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
302    }
303
304    #[cfg(not(windows))]
305    fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
306        if self.follow_link {
307            fs::metadata(&self.path)
308        } else {
309            fs::symlink_metadata(&self.path)
310        }
311        .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
312    }
313
314    fn file_type(&self) -> FileType {
315        self.ty
316    }
317
318    fn file_name(&self) -> &OsStr {
319        self.path
320            .file_name()
321            .unwrap_or_else(|| self.path.as_os_str())
322    }
323
324    fn depth(&self) -> usize {
325        self.depth
326    }
327
328    #[cfg(unix)]
329    fn ino(&self) -> u64 {
330        self.ino
331    }
332
333    fn from_entry(depth: usize, ent: &fs::DirEntry) -> Result<DirEntryRaw, Error> {
334        let ty = ent.file_type().map_err(|err| {
335            let err = Error::Io(io::Error::from(err)).with_path(ent.path());
336            Error::WithDepth {
337                depth: depth,
338                err: Box::new(err),
339            }
340        })?;
341        DirEntryRaw::from_entry_os(depth, ent, ty)
342    }
343
344    #[cfg(windows)]
345    fn from_entry_os(
346        depth: usize,
347        ent: &fs::DirEntry,
348        ty: fs::FileType,
349    ) -> Result<DirEntryRaw, Error> {
350        let md = ent.metadata().map_err(|err| {
351            let err = Error::Io(io::Error::from(err)).with_path(ent.path());
352            Error::WithDepth {
353                depth: depth,
354                err: Box::new(err),
355            }
356        })?;
357        Ok(DirEntryRaw {
358            path: ent.path(),
359            ty: ty,
360            follow_link: false,
361            depth: depth,
362            metadata: md,
363        })
364    }
365
366    #[cfg(unix)]
367    fn from_entry_os(
368        depth: usize,
369        ent: &fs::DirEntry,
370        ty: fs::FileType,
371    ) -> Result<DirEntryRaw, Error> {
372        use std::os::unix::fs::DirEntryExt;
373
374        Ok(DirEntryRaw {
375            path: ent.path(),
376            ty: ty,
377            follow_link: false,
378            depth: depth,
379            ino: ent.ino(),
380        })
381    }
382
383    // Placeholder implementation to allow compiling on non-standard platforms (e.g. wasm32).
384    #[cfg(not(any(windows, unix)))]
385    fn from_entry_os(
386        depth: usize,
387        ent: &fs::DirEntry,
388        ty: fs::FileType,
389    ) -> Result<DirEntryRaw, Error> {
390        Err(Error::Io(io::Error::new(
391            io::ErrorKind::Other,
392            "unsupported platform",
393        )))
394    }
395
396    #[cfg(windows)]
397    fn from_path(depth: usize, pb: PathBuf, link: bool) -> Result<DirEntryRaw, Error> {
398        let md = fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
399        Ok(DirEntryRaw {
400            path: pb,
401            ty: md.file_type(),
402            follow_link: link,
403            depth: depth,
404            metadata: md,
405        })
406    }
407
408    #[cfg(unix)]
409    fn from_path(depth: usize, pb: PathBuf, link: bool) -> Result<DirEntryRaw, Error> {
410        use std::os::unix::fs::MetadataExt;
411
412        let md = fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
413        Ok(DirEntryRaw {
414            path: pb,
415            ty: md.file_type(),
416            follow_link: link,
417            depth: depth,
418            ino: md.ino(),
419        })
420    }
421
422    // Placeholder implementation to allow compiling on non-standard platforms (e.g. wasm32).
423    #[cfg(not(any(windows, unix)))]
424    fn from_path(depth: usize, pb: PathBuf, link: bool) -> Result<DirEntryRaw, Error> {
425        Err(Error::Io(io::Error::new(
426            io::ErrorKind::Other,
427            "unsupported platform",
428        )))
429    }
430}
431
432/// WalkBuilder builds a recursive directory iterator.
433///
434/// The builder supports a large number of configurable options. This includes
435/// specific glob overrides, file type matching, toggling whether hidden
436/// files are ignored or not, and of course, support for respecting gitignore
437/// files.
438///
439/// By default, all ignore files found are respected. This includes `.ignore`,
440/// `.gitignore`, `.git/info/exclude` and even your global gitignore
441/// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`.
442///
443/// Some standard recursive directory options are also supported, such as
444/// limiting the recursive depth or whether to follow symbolic links (disabled
445/// by default).
446///
447/// # Ignore rules
448///
449/// There are many rules that influence whether a particular file or directory
450/// is skipped by this iterator. Those rules are documented here. Note that
451/// the rules assume a default configuration.
452///
453/// * First, glob overrides are checked. If a path matches a glob override,
454/// then matching stops. The path is then only skipped if the glob that matched
455/// the path is an ignore glob. (An override glob is a whitelist glob unless it
456/// starts with a `!`, in which case it is an ignore glob.)
457/// * Second, ignore files are checked. Ignore files currently only come from
458/// git ignore files (`.gitignore`, `.git/info/exclude` and the configured
459/// global gitignore file), plain `.ignore` files, which have the same format
460/// as gitignore files, or explicitly added ignore files. The precedence order
461/// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and
462/// finally explicitly added ignore files. Note that precedence between
463/// different types of ignore files is not impacted by the directory hierarchy;
464/// any `.ignore` file overrides all `.gitignore` files. Within each precedence
465/// level, more nested ignore files have a higher precedence than less nested
466/// ignore files.
467/// * Third, if the previous step yields an ignore match, then all matching
468/// is stopped and the path is skipped. If it yields a whitelist match, then
469/// matching continues. A whitelist match can be overridden by a later matcher.
470/// * Fourth, unless the path is a directory, the file type matcher is run on
471/// the path. As above, if it yields an ignore match, then all matching is
472/// stopped and the path is skipped. If it yields a whitelist match, then
473/// matching continues.
474/// * Fifth, if the path hasn't been whitelisted and it is hidden, then the
475/// path is skipped.
476/// * Sixth, unless the path is a directory, the size of the file is compared
477/// against the max filesize limit. If it exceeds the limit, it is skipped.
478/// * Seventh, if the path has made it this far then it is yielded in the
479/// iterator.
480#[derive(Clone)]
481pub struct WalkBuilder {
482    paths: Vec<PathBuf>,
483    ig_builder: IgnoreBuilder,
484    max_depth: Option<usize>,
485    max_filesize: Option<u64>,
486    follow_links: bool,
487    same_file_system: bool,
488    sorter: Option<Sorter>,
489    threads: usize,
490    skip: Option<Arc<Handle>>,
491}
492
493#[derive(Clone)]
494enum Sorter {
495    ByName(Arc<dyn Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static>),
496    ByPath(Arc<dyn Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static>),
497}
498
499impl fmt::Debug for WalkBuilder {
500    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
501        f.debug_struct("WalkBuilder")
502            .field("paths", &self.paths)
503            .field("ig_builder", &self.ig_builder)
504            .field("max_depth", &self.max_depth)
505            .field("max_filesize", &self.max_filesize)
506            .field("follow_links", &self.follow_links)
507            .field("threads", &self.threads)
508            .field("skip", &self.skip)
509            .finish()
510    }
511}
512
513impl WalkBuilder {
514    /// Create a new builder for a recursive directory iterator for the
515    /// directory given.
516    ///
517    /// Note that if you want to traverse multiple different directories, it
518    /// is better to call `add` on this builder than to create multiple
519    /// `Walk` values.
520    pub fn new<P: AsRef<Path>>(path: P) -> WalkBuilder {
521        WalkBuilder {
522            paths: vec![path.as_ref().to_path_buf()],
523            ig_builder: IgnoreBuilder::new(),
524            max_depth: None,
525            max_filesize: None,
526            follow_links: false,
527            same_file_system: false,
528            sorter: None,
529            threads: 0,
530            skip: None,
531        }
532    }
533
534    /// Build a new `Walk` iterator.
535    pub fn build(&self) -> Walk {
536        let follow_links = self.follow_links;
537        let max_depth = self.max_depth;
538        let sorter = self.sorter.clone();
539        let its = self
540            .paths
541            .iter()
542            .map(move |p| {
543                if p == Path::new("-") {
544                    (p.to_path_buf(), None)
545                } else {
546                    let mut wd = WalkDir::new(p);
547                    wd = wd.follow_links(follow_links || p.is_file());
548                    wd = wd.same_file_system(self.same_file_system);
549                    if let Some(max_depth) = max_depth {
550                        wd = wd.max_depth(max_depth);
551                    }
552                    if let Some(ref sorter) = sorter {
553                        match sorter.clone() {
554                            Sorter::ByName(cmp) => {
555                                wd = wd.sort_by(move |a, b| cmp(a.file_name(), b.file_name()));
556                            }
557                            Sorter::ByPath(cmp) => {
558                                wd = wd.sort_by(move |a, b| cmp(a.path(), b.path()));
559                            }
560                        }
561                    }
562                    (p.to_path_buf(), Some(WalkEventIter::from(wd)))
563                }
564            })
565            .collect::<Vec<_>>()
566            .into_iter();
567        let ig_root = self.ig_builder.build();
568        Walk {
569            its: its,
570            it: None,
571            ig_root: ig_root.clone(),
572            ig: ig_root.clone(),
573            max_filesize: self.max_filesize,
574            skip: self.skip.clone(),
575        }
576    }
577
578    /// Build a new `WalkParallel` iterator.
579    ///
580    /// Note that this *doesn't* return something that implements `Iterator`.
581    /// Instead, the returned value must be run with a closure. e.g.,
582    /// `builder.build_parallel().run(|| |path| println!("{:?}", path))`.
583    pub fn build_parallel(&self) -> WalkParallel {
584        WalkParallel {
585            paths: self.paths.clone().into_iter(),
586            ig_root: self.ig_builder.build(),
587            max_depth: self.max_depth,
588            max_filesize: self.max_filesize,
589            follow_links: self.follow_links,
590            same_file_system: self.same_file_system,
591            threads: self.threads,
592            skip: self.skip.clone(),
593        }
594    }
595
596    /// Add a file path to the iterator.
597    ///
598    /// Each additional file path added is traversed recursively. This should
599    /// be preferred over building multiple `Walk` iterators since this
600    /// enables reusing resources across iteration.
601    pub fn add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder {
602        self.paths.push(path.as_ref().to_path_buf());
603        self
604    }
605
606    /// The maximum depth to recurse.
607    ///
608    /// The default, `None`, imposes no depth restriction.
609    pub fn max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder {
610        self.max_depth = depth;
611        self
612    }
613
614    /// Whether to follow symbolic links or not.
615    pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder {
616        self.follow_links = yes;
617        self
618    }
619
620    /// Whether to ignore files above the specified limit.
621    pub fn max_filesize(&mut self, filesize: Option<u64>) -> &mut WalkBuilder {
622        self.max_filesize = filesize;
623        self
624    }
625
626    /// The number of threads to use for traversal.
627    ///
628    /// Note that this only has an effect when using `build_parallel`.
629    ///
630    /// The default setting is `0`, which chooses the number of threads
631    /// automatically using heuristics.
632    pub fn threads(&mut self, n: usize) -> &mut WalkBuilder {
633        self.threads = n;
634        self
635    }
636
637    /// Add a global ignore file to the matcher.
638    ///
639    /// This has lower precedence than all other sources of ignore rules.
640    ///
641    /// If there was a problem adding the ignore file, then an error is
642    /// returned. Note that the error may indicate *partial* failure. For
643    /// example, if an ignore file contains an invalid glob, all other globs
644    /// are still applied.
645    pub fn add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
646        let mut builder = GitignoreBuilder::new("");
647        let mut errs = PartialErrorBuilder::default();
648        errs.maybe_push(builder.add(path));
649        match builder.build() {
650            Ok(gi) => {
651                self.ig_builder.add_ignore(gi);
652            }
653            Err(err) => {
654                errs.push(err);
655            }
656        }
657        errs.into_error_option()
658    }
659
660    /// Add a custom ignore file name
661    ///
662    /// These ignore files have higher precedence than all other ignore files.
663    ///
664    /// When specifying multiple names, earlier names have lower precedence than
665    /// later names.
666    pub fn add_custom_ignore_filename<S: AsRef<OsStr>>(
667        &mut self,
668        file_name: S,
669    ) -> &mut WalkBuilder {
670        self.ig_builder.add_custom_ignore_filename(file_name);
671        self
672    }
673
674    /// Add an override matcher.
675    ///
676    /// By default, no override matcher is used.
677    ///
678    /// This overrides any previous setting.
679    pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder {
680        self.ig_builder.overrides(overrides);
681        self
682    }
683
684    /// Add a file type matcher.
685    ///
686    /// By default, no file type matcher is used.
687    ///
688    /// This overrides any previous setting.
689    pub fn types(&mut self, types: Types) -> &mut WalkBuilder {
690        self.ig_builder.types(types);
691        self
692    }
693
694    /// Enables all the standard ignore filters.
695    ///
696    /// This toggles, as a group, all the filters that are enabled by default:
697    ///
698    /// - [hidden()](#method.hidden)
699    /// - [parents()](#method.parents)
700    /// - [ignore()](#method.ignore)
701    /// - [git_ignore()](#method.git_ignore)
702    /// - [git_global()](#method.git_global)
703    /// - [git_exclude()](#method.git_exclude)
704    ///
705    /// They may still be toggled individually after calling this function.
706    ///
707    /// This is (by definition) enabled by default.
708    pub fn standard_filters(&mut self, yes: bool) -> &mut WalkBuilder {
709        self.hidden(yes)
710            .parents(yes)
711            .ignore(yes)
712            .git_ignore(yes)
713            .git_global(yes)
714            .git_exclude(yes)
715    }
716
717    /// Enables ignoring hidden files.
718    ///
719    /// This is enabled by default.
720    pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder {
721        self.ig_builder.hidden(yes);
722        self
723    }
724
725    /// Enables reading ignore files from parent directories.
726    ///
727    /// If this is enabled, then .gitignore files in parent directories of each
728    /// file path given are respected. Otherwise, they are ignored.
729    ///
730    /// This is enabled by default.
731    pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder {
732        self.ig_builder.parents(yes);
733        self
734    }
735
736    /// Enables reading `.ignore` files.
737    ///
738    /// `.ignore` files have the same semantics as `gitignore` files and are
739    /// supported by search tools such as ripgrep and The Silver Searcher.
740    ///
741    /// This is enabled by default.
742    pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder {
743        self.ig_builder.ignore(yes);
744        self
745    }
746
747    /// Enables reading a global gitignore file, whose path is specified in
748    /// git's `core.excludesFile` config option.
749    ///
750    /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig`
751    /// does not exist or does not specify `core.excludesFile`, then
752    /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
753    /// set or is empty, then `$HOME/.config/git/ignore` is used instead.
754    ///
755    /// This is enabled by default.
756    pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder {
757        self.ig_builder.git_global(yes);
758        self
759    }
760
761    /// Enables reading `.gitignore` files.
762    ///
763    /// `.gitignore` files have match semantics as described in the `gitignore`
764    /// man page.
765    ///
766    /// This is enabled by default.
767    pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder {
768        self.ig_builder.git_ignore(yes);
769        self
770    }
771
772    /// Enables reading `.git/info/exclude` files.
773    ///
774    /// `.git/info/exclude` files have match semantics as described in the
775    /// `gitignore` man page.
776    ///
777    /// This is enabled by default.
778    pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder {
779        self.ig_builder.git_exclude(yes);
780        self
781    }
782
783    /// Process ignore files case insensitively
784    ///
785    /// This is disabled by default.
786    pub fn ignore_case_insensitive(&mut self, yes: bool) -> &mut WalkBuilder {
787        self.ig_builder.ignore_case_insensitive(yes);
788        self
789    }
790
791    /// Set a function for sorting directory entries by their path.
792    ///
793    /// If a compare function is set, the resulting iterator will return all
794    /// paths in sorted order. The compare function will be called to compare
795    /// entries from the same directory.
796    ///
797    /// This is like `sort_by_file_name`, except the comparator accepts
798    /// a `&Path` instead of the base file name, which permits it to sort by
799    /// more criteria.
800    ///
801    /// This method will override any previous sorter set by this method or
802    /// by `sort_by_file_name`.
803    ///
804    /// Note that this is not used in the parallel iterator.
805    pub fn sort_by_file_path<F>(&mut self, cmp: F) -> &mut WalkBuilder
806    where
807        F: Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static,
808    {
809        self.sorter = Some(Sorter::ByPath(Arc::new(cmp)));
810        self
811    }
812
813    /// Set a function for sorting directory entries by file name.
814    ///
815    /// If a compare function is set, the resulting iterator will return all
816    /// paths in sorted order. The compare function will be called to compare
817    /// names from entries from the same directory using only the name of the
818    /// entry.
819    ///
820    /// This method will override any previous sorter set by this method or
821    /// by `sort_by_file_path`.
822    ///
823    /// Note that this is not used in the parallel iterator.
824    pub fn sort_by_file_name<F>(&mut self, cmp: F) -> &mut WalkBuilder
825    where
826        F: Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static,
827    {
828        self.sorter = Some(Sorter::ByName(Arc::new(cmp)));
829        self
830    }
831
832    /// Do not cross file system boundaries.
833    ///
834    /// When this option is enabled, directory traversal will not descend into
835    /// directories that are on a different file system from the root path.
836    ///
837    /// Currently, this option is only supported on Unix and Windows. If this
838    /// option is used on an unsupported platform, then directory traversal
839    /// will immediately return an error and will not yield any entries.
840    pub fn same_file_system(&mut self, yes: bool) -> &mut WalkBuilder {
841        self.same_file_system = yes;
842        self
843    }
844
845    /// Do not yield directory entries that are believed to correspond to
846    /// stdout.
847    ///
848    /// This is useful when a command is invoked via shell redirection to a
849    /// file that is also being read. For example, `grep -r foo ./ > results`
850    /// might end up trying to search `results` even though it is also writing
851    /// to it, which could cause an unbounded feedback loop. Setting this
852    /// option prevents this from happening by skipping over the `results`
853    /// file.
854    ///
855    /// This is disabled by default.
856    pub fn skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder {
857        if yes {
858            self.skip = stdout_handle().map(Arc::new);
859        } else {
860            self.skip = None;
861        }
862        self
863    }
864}
865
866/// Walk is a recursive directory iterator over file paths in one or more
867/// directories.
868///
869/// Only file and directory paths matching the rules are returned. By default,
870/// ignore files like `.gitignore` are respected. The precise matching rules
871/// and precedence is explained in the documentation for `WalkBuilder`.
872pub struct Walk {
873    its: vec::IntoIter<(PathBuf, Option<WalkEventIter>)>,
874    it: Option<WalkEventIter>,
875    ig_root: Ignore,
876    ig: Ignore,
877    max_filesize: Option<u64>,
878    skip: Option<Arc<Handle>>,
879}
880
881impl Walk {
882    /// Creates a new recursive directory iterator for the file path given.
883    ///
884    /// Note that this uses default settings, which include respecting
885    /// `.gitignore` files. To configure the iterator, use `WalkBuilder`
886    /// instead.
887    pub fn new<P: AsRef<Path>>(path: P) -> Walk {
888        WalkBuilder::new(path).build()
889    }
890
891    fn skip_entry(&self, ent: &DirEntry) -> Result<bool, Error> {
892        if ent.depth() == 0 {
893            return Ok(false);
894        }
895
896        if let Some(ref stdout) = self.skip {
897            if path_equals(ent, stdout)? {
898                return Ok(true);
899            }
900        }
901        if should_skip_entry(&self.ig, ent) {
902            return Ok(true);
903        }
904        if self.max_filesize.is_some() && !ent.is_dir() {
905            return Ok(skip_filesize(
906                self.max_filesize.unwrap(),
907                ent.path(),
908                &ent.metadata().ok(),
909            ));
910        }
911        Ok(false)
912    }
913}
914
915impl Iterator for Walk {
916    type Item = Result<DirEntry, Error>;
917
918    #[inline(always)]
919    fn next(&mut self) -> Option<Result<DirEntry, Error>> {
920        loop {
921            let ev = match self.it.as_mut().and_then(|it| it.next()) {
922                Some(ev) => ev,
923                None => {
924                    match self.its.next() {
925                        None => return None,
926                        Some((_, None)) => {
927                            return Some(Ok(DirEntry::new_stdin()));
928                        }
929                        Some((path, Some(it))) => {
930                            self.it = Some(it);
931                            if path.is_dir() {
932                                let (ig, err) = self.ig_root.add_parents(path);
933                                self.ig = ig;
934                                if let Some(err) = err {
935                                    return Some(Err(err));
936                                }
937                            } else {
938                                self.ig = self.ig_root.clone();
939                            }
940                        }
941                    }
942                    continue;
943                }
944            };
945            match ev {
946                Err(err) => {
947                    return Some(Err(Error::from_walkdir(err)));
948                }
949                Ok(WalkEvent::Exit) => {
950                    self.ig = self.ig.parent().unwrap();
951                }
952                Ok(WalkEvent::Dir(ent)) => {
953                    let mut ent = DirEntry::new_walkdir(ent, None);
954                    let should_skip = match self.skip_entry(&ent) {
955                        Err(err) => return Some(Err(err)),
956                        Ok(should_skip) => should_skip,
957                    };
958                    if should_skip {
959                        self.it.as_mut().unwrap().it.skip_current_dir();
960                        // Still need to push this on the stack because
961                        // we'll get a WalkEvent::Exit event for this dir.
962                        // We don't care if it errors though.
963                        let (igtmp, _) = self.ig.add_child(ent.path());
964                        self.ig = igtmp;
965                        continue;
966                    }
967                    let (igtmp, err) = self.ig.add_child(ent.path());
968                    self.ig = igtmp;
969                    ent.err = err;
970                    return Some(Ok(ent));
971                }
972                Ok(WalkEvent::File(ent)) => {
973                    let ent = DirEntry::new_walkdir(ent, None);
974                    let should_skip = match self.skip_entry(&ent) {
975                        Err(err) => return Some(Err(err)),
976                        Ok(should_skip) => should_skip,
977                    };
978                    if should_skip {
979                        continue;
980                    }
981                    return Some(Ok(ent));
982                }
983            }
984        }
985    }
986}
987
988/// WalkEventIter transforms a WalkDir iterator into an iterator that more
989/// accurately describes the directory tree. Namely, it emits events that are
990/// one of three types: directory, file or "exit." An "exit" event means that
991/// the entire contents of a directory have been enumerated.
992struct WalkEventIter {
993    depth: usize,
994    it: walkdir::IntoIter,
995    next: Option<Result<walkdir::DirEntry, walkdir::Error>>,
996}
997
998#[derive(Debug)]
999enum WalkEvent {
1000    Dir(walkdir::DirEntry),
1001    File(walkdir::DirEntry),
1002    Exit,
1003}
1004
1005impl From<WalkDir> for WalkEventIter {
1006    fn from(it: WalkDir) -> WalkEventIter {
1007        WalkEventIter {
1008            depth: 0,
1009            it: it.into_iter(),
1010            next: None,
1011        }
1012    }
1013}
1014
1015impl Iterator for WalkEventIter {
1016    type Item = walkdir::Result<WalkEvent>;
1017
1018    #[inline(always)]
1019    fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
1020        let dent = self.next.take().or_else(|| self.it.next());
1021        let depth = match dent {
1022            None => 0,
1023            Some(Ok(ref dent)) => dent.depth(),
1024            Some(Err(ref err)) => err.depth(),
1025        };
1026        if depth < self.depth {
1027            self.depth -= 1;
1028            self.next = dent;
1029            return Some(Ok(WalkEvent::Exit));
1030        }
1031        self.depth = depth;
1032        match dent {
1033            None => None,
1034            Some(Err(err)) => Some(Err(err)),
1035            Some(Ok(dent)) => {
1036                if dent.file_type().is_dir() {
1037                    self.depth += 1;
1038                    Some(Ok(WalkEvent::Dir(dent)))
1039                } else {
1040                    Some(Ok(WalkEvent::File(dent)))
1041                }
1042            }
1043        }
1044    }
1045}
1046
1047/// WalkState is used in the parallel recursive directory iterator to indicate
1048/// whether walking should continue as normal, skip descending into a
1049/// particular directory or quit the walk entirely.
1050#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1051pub enum WalkState {
1052    /// Continue walking as normal.
1053    Continue,
1054    /// If the directory entry given is a directory, don't descend into it.
1055    /// In all other cases, this has no effect.
1056    Skip,
1057    /// Quit the entire iterator as soon as possible.
1058    ///
1059    /// Note that this is an inherently asynchronous action. It is possible
1060    /// for more entries to be yielded even after instructing the iterator
1061    /// to quit.
1062    Quit,
1063}
1064
1065impl WalkState {
1066    fn is_quit(&self) -> bool {
1067        *self == WalkState::Quit
1068    }
1069}
1070
1071/// WalkParallel is a parallel recursive directory iterator over files paths
1072/// in one or more directories.
1073///
1074/// Only file and directory paths matching the rules are returned. By default,
1075/// ignore files like `.gitignore` are respected. The precise matching rules
1076/// and precedence is explained in the documentation for `WalkBuilder`.
1077///
1078/// Unlike `Walk`, this uses multiple threads for traversing a directory.
1079pub struct WalkParallel {
1080    paths: vec::IntoIter<PathBuf>,
1081    ig_root: Ignore,
1082    max_filesize: Option<u64>,
1083    max_depth: Option<usize>,
1084    follow_links: bool,
1085    same_file_system: bool,
1086    threads: usize,
1087    skip: Option<Arc<Handle>>,
1088}
1089
1090impl WalkParallel {
1091    /// Execute the parallel recursive directory iterator. `mkf` is called
1092    /// for each thread used for iteration. The function produced by `mkf`
1093    /// is then in turn called for each visited file path.
1094    pub fn run<F>(self, mut mkf: F)
1095    where
1096        F: FnMut() -> Box<dyn FnMut(Result<DirEntry, Error>) -> WalkState + Send + 'static>,
1097    {
1098        let mut f = mkf();
1099        let threads = self.threads();
1100        // TODO: Figure out how to use a bounded channel here. With an
1101        // unbounded channel, the workers can run away and fill up memory
1102        // with all of the file paths. But a bounded channel doesn't work since
1103        // our producers are also are consumers, so they end up getting stuck.
1104        //
1105        // We probably need to rethink parallel traversal completely to fix
1106        // this. The best case scenario would be finding a way to use rayon
1107        // to do this.
1108        let (tx, rx) = channel::unbounded();
1109        let mut any_work = false;
1110        // Send the initial set of root paths to the pool of workers.
1111        // Note that we only send directories. For files, we send to them the
1112        // callback directly.
1113        for path in self.paths {
1114            let (dent, root_device) = if path == Path::new("-") {
1115                (DirEntry::new_stdin(), None)
1116            } else {
1117                let root_device = if !self.same_file_system {
1118                    None
1119                } else {
1120                    match device_num(&path) {
1121                        Ok(root_device) => Some(root_device),
1122                        Err(err) => {
1123                            let err = Error::Io(err).with_path(path);
1124                            if f(Err(err)).is_quit() {
1125                                return;
1126                            }
1127                            continue;
1128                        }
1129                    }
1130                };
1131                match DirEntryRaw::from_path(0, path, false) {
1132                    Ok(dent) => (DirEntry::new_raw(dent, None), root_device),
1133                    Err(err) => {
1134                        if f(Err(err)).is_quit() {
1135                            return;
1136                        }
1137                        continue;
1138                    }
1139                }
1140            };
1141            tx.send(Message::Work(Work {
1142                dent: dent,
1143                ignore: self.ig_root.clone(),
1144                root_device: root_device,
1145            }))
1146            .unwrap();
1147            any_work = true;
1148        }
1149        // ... but there's no need to start workers if we don't need them.
1150        if !any_work {
1151            return;
1152        }
1153        // Create the workers and then wait for them to finish.
1154        let num_waiting = Arc::new(AtomicUsize::new(0));
1155        let num_quitting = Arc::new(AtomicUsize::new(0));
1156        let quit_now = Arc::new(AtomicBool::new(false));
1157        let mut handles = vec![];
1158        for _ in 0..threads {
1159            let worker = Worker {
1160                f: mkf(),
1161                tx: tx.clone(),
1162                rx: rx.clone(),
1163                quit_now: quit_now.clone(),
1164                is_waiting: false,
1165                is_quitting: false,
1166                num_waiting: num_waiting.clone(),
1167                num_quitting: num_quitting.clone(),
1168                threads: threads,
1169                max_depth: self.max_depth,
1170                max_filesize: self.max_filesize,
1171                follow_links: self.follow_links,
1172                skip: self.skip.clone(),
1173            };
1174            handles.push(thread::spawn(|| worker.run()));
1175        }
1176        drop(tx);
1177        drop(rx);
1178        for handle in handles {
1179            handle.join().unwrap();
1180        }
1181    }
1182
1183    fn threads(&self) -> usize {
1184        if self.threads == 0 {
1185            2
1186        } else {
1187            self.threads
1188        }
1189    }
1190}
1191
1192/// Message is the set of instructions that a worker knows how to process.
1193enum Message {
1194    /// A work item corresponds to a directory that should be descended into.
1195    /// Work items for entries that should be skipped or ignored should not
1196    /// be produced.
1197    Work(Work),
1198    /// This instruction indicates that the worker should start quitting.
1199    Quit,
1200}
1201
1202/// A unit of work for each worker to process.
1203///
1204/// Each unit of work corresponds to a directory that should be descended
1205/// into.
1206struct Work {
1207    /// The directory entry.
1208    dent: DirEntry,
1209    /// Any ignore matchers that have been built for this directory's parents.
1210    ignore: Ignore,
1211    /// The root device number. When present, only files with the same device
1212    /// number should be considered.
1213    root_device: Option<u64>,
1214}
1215
1216impl Work {
1217    /// Returns true if and only if this work item is a directory.
1218    fn is_dir(&self) -> bool {
1219        self.dent.is_dir()
1220    }
1221
1222    /// Returns true if and only if this work item is a symlink.
1223    fn is_symlink(&self) -> bool {
1224        self.dent.file_type().map_or(false, |ft| ft.is_symlink())
1225    }
1226
1227    /// Adds ignore rules for parent directories.
1228    ///
1229    /// Note that this only applies to entries at depth 0. On all other
1230    /// entries, this is a no-op.
1231    fn add_parents(&mut self) -> Option<Error> {
1232        if self.dent.depth() > 0 {
1233            return None;
1234        }
1235        // At depth 0, the path of this entry is a root path, so we can
1236        // use it directly to add parent ignore rules.
1237        let (ig, err) = self.ignore.add_parents(self.dent.path());
1238        self.ignore = ig;
1239        err
1240    }
1241
1242    /// Reads the directory contents of this work item and adds ignore
1243    /// rules for this directory.
1244    ///
1245    /// If there was a problem with reading the directory contents, then
1246    /// an error is returned. If there was a problem reading the ignore
1247    /// rules for this directory, then the error is attached to this
1248    /// work item's directory entry.
1249    fn read_dir(&mut self) -> Result<fs::ReadDir, Error> {
1250        let readdir = match fs::read_dir(self.dent.path()) {
1251            Ok(readdir) => readdir,
1252            Err(err) => {
1253                let err = Error::from(err)
1254                    .with_path(self.dent.path())
1255                    .with_depth(self.dent.depth());
1256                return Err(err);
1257            }
1258        };
1259        let (ig, err) = self.ignore.add_child(self.dent.path());
1260        self.ignore = ig;
1261        self.dent.err = err;
1262        Ok(readdir)
1263    }
1264}
1265
1266/// A worker is responsible for descending into directories, updating the
1267/// ignore matchers, producing new work and invoking the caller's callback.
1268///
1269/// Note that a worker is *both* a producer and a consumer.
1270struct Worker {
1271    /// The caller's callback.
1272    f: Box<dyn FnMut(Result<DirEntry, Error>) -> WalkState + Send + 'static>,
1273    /// The push side of our mpmc queue.
1274    tx: channel::Sender<Message>,
1275    /// The receive side of our mpmc queue.
1276    rx: channel::Receiver<Message>,
1277    /// Whether all workers should quit at the next opportunity. Note that
1278    /// this is distinct from quitting because of exhausting the contents of
1279    /// a directory. Instead, this is used when the caller's callback indicates
1280    /// that the iterator should quit immediately.
1281    quit_now: Arc<AtomicBool>,
1282    /// Whether this worker is waiting for more work.
1283    is_waiting: bool,
1284    /// Whether this worker has started to quit.
1285    is_quitting: bool,
1286    /// The number of workers waiting for more work.
1287    num_waiting: Arc<AtomicUsize>,
1288    /// The number of workers waiting to quit.
1289    num_quitting: Arc<AtomicUsize>,
1290    /// The total number of workers.
1291    threads: usize,
1292    /// The maximum depth of directories to descend. A value of `0` means no
1293    /// descension at all.
1294    max_depth: Option<usize>,
1295    /// The maximum size a searched file can be (in bytes). If a file exceeds
1296    /// this size it will be skipped.
1297    max_filesize: Option<u64>,
1298    /// Whether to follow symbolic links or not. When this is enabled, loop
1299    /// detection is performed.
1300    follow_links: bool,
1301    /// A file handle to skip, currently is either `None` or stdout, if it's
1302    /// a file and it has been requested to skip files identical to stdout.
1303    skip: Option<Arc<Handle>>,
1304}
1305
1306impl Worker {
1307    /// Runs this worker until there is no more work left to do.
1308    ///
1309    /// The worker will call the caller's callback for all entries that aren't
1310    /// skipped by the ignore matcher.
1311    fn run(mut self) {
1312        while let Some(mut work) = self.get_work() {
1313            // If the work is not a directory, then we can just execute the
1314            // caller's callback immediately and move on.
1315            if work.is_symlink() || !work.is_dir() {
1316                if (self.f)(Ok(work.dent)).is_quit() {
1317                    self.quit_now();
1318                    return;
1319                }
1320                continue;
1321            }
1322            if let Some(err) = work.add_parents() {
1323                if (self.f)(Err(err)).is_quit() {
1324                    self.quit_now();
1325                    return;
1326                }
1327            }
1328            let readdir = match work.read_dir() {
1329                Ok(readdir) => readdir,
1330                Err(err) => {
1331                    if (self.f)(Err(err)).is_quit() {
1332                        self.quit_now();
1333                        return;
1334                    }
1335                    continue;
1336                }
1337            };
1338            let descend = if let Some(root_device) = work.root_device {
1339                match is_same_file_system(root_device, work.dent.path()) {
1340                    Ok(true) => true,
1341                    Ok(false) => false,
1342                    Err(err) => {
1343                        if (self.f)(Err(err)).is_quit() {
1344                            self.quit_now();
1345                            return;
1346                        }
1347                        false
1348                    }
1349                }
1350            } else {
1351                true
1352            };
1353
1354            let depth = work.dent.depth();
1355            match (self.f)(Ok(work.dent)) {
1356                WalkState::Continue => {}
1357                WalkState::Skip => continue,
1358                WalkState::Quit => {
1359                    self.quit_now();
1360                    return;
1361                }
1362            }
1363            if !descend {
1364                continue;
1365            }
1366            if self.max_depth.map_or(false, |max| depth >= max) {
1367                continue;
1368            }
1369            for result in readdir {
1370                let state = self.run_one(&work.ignore, depth + 1, work.root_device, result);
1371                if state.is_quit() {
1372                    self.quit_now();
1373                    return;
1374                }
1375            }
1376        }
1377    }
1378
1379    /// Runs the worker on a single entry from a directory iterator.
1380    ///
1381    /// If the entry is a path that should be ignored, then this is a no-op.
1382    /// Otherwise, the entry is pushed on to the queue. (The actual execution
1383    /// of the callback happens in `run`.)
1384    ///
1385    /// If an error occurs while reading the entry, then it is sent to the
1386    /// caller's callback.
1387    ///
1388    /// `ig` is the `Ignore` matcher for the parent directory. `depth` should
1389    /// be the depth of this entry. `result` should be the item yielded by
1390    /// a directory iterator.
1391    fn run_one(
1392        &mut self,
1393        ig: &Ignore,
1394        depth: usize,
1395        root_device: Option<u64>,
1396        result: Result<fs::DirEntry, io::Error>,
1397    ) -> WalkState {
1398        let fs_dent = match result {
1399            Ok(fs_dent) => fs_dent,
1400            Err(err) => {
1401                return (self.f)(Err(Error::from(err).with_depth(depth)));
1402            }
1403        };
1404        let mut dent = match DirEntryRaw::from_entry(depth, &fs_dent) {
1405            Ok(dent) => DirEntry::new_raw(dent, None),
1406            Err(err) => {
1407                return (self.f)(Err(err));
1408            }
1409        };
1410        let is_symlink = dent.file_type().map_or(false, |ft| ft.is_symlink());
1411        if self.follow_links && is_symlink {
1412            let path = dent.path().to_path_buf();
1413            dent = match DirEntryRaw::from_path(depth, path, true) {
1414                Ok(dent) => DirEntry::new_raw(dent, None),
1415                Err(err) => {
1416                    return (self.f)(Err(err));
1417                }
1418            };
1419            if dent.is_dir() {
1420                if let Err(err) = check_symlink_loop(ig, dent.path(), depth) {
1421                    return (self.f)(Err(err));
1422                }
1423            }
1424        }
1425        if let Some(ref stdout) = self.skip {
1426            let is_stdout = match path_equals(&dent, stdout) {
1427                Ok(is_stdout) => is_stdout,
1428                Err(err) => return (self.f)(Err(err)),
1429            };
1430            if is_stdout {
1431                return WalkState::Continue;
1432            }
1433        }
1434        let should_skip_path = should_skip_entry(ig, &dent);
1435        let should_skip_filesize = if self.max_filesize.is_some() && !dent.is_dir() {
1436            skip_filesize(
1437                self.max_filesize.unwrap(),
1438                dent.path(),
1439                &dent.metadata().ok(),
1440            )
1441        } else {
1442            false
1443        };
1444
1445        if !should_skip_path && !should_skip_filesize {
1446            self.tx
1447                .send(Message::Work(Work {
1448                    dent: dent,
1449                    ignore: ig.clone(),
1450                    root_device: root_device,
1451                }))
1452                .unwrap();
1453        }
1454        WalkState::Continue
1455    }
1456
1457    /// Returns the next directory to descend into.
1458    ///
1459    /// If all work has been exhausted, then this returns None. The worker
1460    /// should then subsequently quit.
1461    fn get_work(&mut self) -> Option<Work> {
1462        loop {
1463            if self.is_quit_now() {
1464                return None;
1465            }
1466            match self.rx.try_recv() {
1467                Ok(Message::Work(work)) => {
1468                    self.waiting(false);
1469                    self.quitting(false);
1470                    return Some(work);
1471                }
1472                Ok(Message::Quit) => {
1473                    // We can't just quit because a Message::Quit could be
1474                    // spurious. For example, it's possible to observe that
1475                    // all workers are waiting even if there's more work to
1476                    // be done.
1477                    //
1478                    // Therefore, we do a bit of a dance to wait until all
1479                    // workers have signaled that they're ready to quit before
1480                    // actually quitting.
1481                    //
1482                    // If the Quit message turns out to be spurious, then the
1483                    // loop below will break and we'll go back to looking for
1484                    // more work.
1485                    self.waiting(true);
1486                    self.quitting(true);
1487                    while !self.is_quit_now() {
1488                        let nwait = self.num_waiting();
1489                        let nquit = self.num_quitting();
1490                        // If the number of waiting workers dropped, then
1491                        // abort our attempt to quit.
1492                        if nwait < self.threads {
1493                            break;
1494                        }
1495                        // If all workers are in this quit loop, then we
1496                        // can stop.
1497                        if nquit == self.threads {
1498                            return None;
1499                        }
1500                        // Otherwise, spin.
1501                    }
1502                }
1503                Err(_) => {
1504                    self.waiting(true);
1505                    self.quitting(false);
1506                    if self.num_waiting() == self.threads {
1507                        for _ in 0..self.threads {
1508                            self.tx.send(Message::Quit).unwrap();
1509                        }
1510                    } else {
1511                        // You're right to consider this suspicious, but it's
1512                        // a useful heuristic to permit producers to catch up
1513                        // to consumers without burning the CPU. It is also
1514                        // useful as a means to prevent burning the CPU if only
1515                        // one worker is left doing actual work. It's not
1516                        // perfect and it doesn't leave the CPU completely
1517                        // idle, but it's not clear what else we can do. :-/
1518                        thread::sleep(Duration::from_millis(1));
1519                    }
1520                }
1521            }
1522        }
1523    }
1524
1525    /// Indicates that all workers should quit immediately.
1526    fn quit_now(&self) {
1527        self.quit_now.store(true, Ordering::SeqCst);
1528    }
1529
1530    /// Returns true if this worker should quit immediately.
1531    fn is_quit_now(&self) -> bool {
1532        self.quit_now.load(Ordering::SeqCst)
1533    }
1534
1535    /// Returns the total number of workers waiting for work.
1536    fn num_waiting(&self) -> usize {
1537        self.num_waiting.load(Ordering::SeqCst)
1538    }
1539
1540    /// Returns the total number of workers ready to quit.
1541    fn num_quitting(&self) -> usize {
1542        self.num_quitting.load(Ordering::SeqCst)
1543    }
1544
1545    /// Sets this worker's "quitting" state to the value of `yes`.
1546    fn quitting(&mut self, yes: bool) {
1547        if yes {
1548            if !self.is_quitting {
1549                self.is_quitting = true;
1550                self.num_quitting.fetch_add(1, Ordering::SeqCst);
1551            }
1552        } else {
1553            if self.is_quitting {
1554                self.is_quitting = false;
1555                self.num_quitting.fetch_sub(1, Ordering::SeqCst);
1556            }
1557        }
1558    }
1559
1560    /// Sets this worker's "waiting" state to the value of `yes`.
1561    fn waiting(&mut self, yes: bool) {
1562        if yes {
1563            if !self.is_waiting {
1564                self.is_waiting = true;
1565                self.num_waiting.fetch_add(1, Ordering::SeqCst);
1566            }
1567        } else {
1568            if self.is_waiting {
1569                self.is_waiting = false;
1570                self.num_waiting.fetch_sub(1, Ordering::SeqCst);
1571            }
1572        }
1573    }
1574}
1575
1576fn check_symlink_loop(
1577    ig_parent: &Ignore,
1578    child_path: &Path,
1579    child_depth: usize,
1580) -> Result<(), Error> {
1581    let hchild = Handle::from_path(child_path).map_err(|err| {
1582        Error::from(err)
1583            .with_path(child_path)
1584            .with_depth(child_depth)
1585    })?;
1586    for ig in ig_parent
1587        .parents()
1588        .take_while(|ig| !ig.is_absolute_parent())
1589    {
1590        let h = Handle::from_path(ig.path()).map_err(|err| {
1591            Error::from(err)
1592                .with_path(child_path)
1593                .with_depth(child_depth)
1594        })?;
1595        if hchild == h {
1596            return Err(Error::Loop {
1597                ancestor: ig.path().to_path_buf(),
1598                child: child_path.to_path_buf(),
1599            }
1600            .with_depth(child_depth));
1601        }
1602    }
1603    Ok(())
1604}
1605
1606// Before calling this function, make sure that you ensure that is really
1607// necessary as the arguments imply a file stat.
1608fn skip_filesize(max_filesize: u64, path: &Path, ent: &Option<Metadata>) -> bool {
1609    let filesize = match *ent {
1610        Some(ref md) => Some(md.len()),
1611        None => None,
1612    };
1613
1614    if let Some(fs) = filesize {
1615        if fs > max_filesize {
1616            debug!("ignoring {}: {} bytes", path.display(), fs);
1617            true
1618        } else {
1619            false
1620        }
1621    } else {
1622        false
1623    }
1624}
1625
1626fn should_skip_entry(ig: &Ignore, dent: &DirEntry) -> bool {
1627    let m = ig.matched_dir_entry(dent);
1628    if m.is_ignore() {
1629        debug!("ignoring {}: {:?}", dent.path().display(), m);
1630        true
1631    } else if m.is_whitelist() {
1632        debug!("whitelisting {}: {:?}", dent.path().display(), m);
1633        false
1634    } else {
1635        false
1636    }
1637}
1638
1639/// Returns a handle to stdout for filtering search.
1640///
1641/// A handle is returned if and only if stdout is being redirected to a file.
1642/// The handle returned corresponds to that file.
1643///
1644/// This can be used to ensure that we do not attempt to search a file that we
1645/// may also be writing to.
1646fn stdout_handle() -> Option<Handle> {
1647    let h = match Handle::stdout() {
1648        Err(_) => return None,
1649        Ok(h) => h,
1650    };
1651    let md = match h.as_file().metadata() {
1652        Err(_) => return None,
1653        Ok(md) => md,
1654    };
1655    if !md.is_file() {
1656        return None;
1657    }
1658    Some(h)
1659}
1660
1661/// Returns true if and only if the given directory entry is believed to be
1662/// equivalent to the given handle. If there was a problem querying the path
1663/// for information to determine equality, then that error is returned.
1664fn path_equals(dent: &DirEntry, handle: &Handle) -> Result<bool, Error> {
1665    #[cfg(unix)]
1666    fn never_equal(dent: &DirEntry, handle: &Handle) -> bool {
1667        dent.ino() != Some(handle.ino())
1668    }
1669
1670    #[cfg(not(unix))]
1671    fn never_equal(_: &DirEntry, _: &Handle) -> bool {
1672        false
1673    }
1674
1675    // If we know for sure that these two things aren't equal, then avoid
1676    // the costly extra stat call to determine equality.
1677    if dent.is_stdin() || never_equal(dent, handle) {
1678        return Ok(false);
1679    }
1680    Handle::from_path(dent.path())
1681        .map(|h| &h == handle)
1682        .map_err(|err| Error::Io(err).with_path(dent.path()))
1683}
1684
1685/// Returns true if and only if the given path is on the same device as the
1686/// given root device.
1687fn is_same_file_system(root_device: u64, path: &Path) -> Result<bool, Error> {
1688    let dent_device = device_num(path).map_err(|err| Error::Io(err).with_path(path))?;
1689    Ok(root_device == dent_device)
1690}
1691
1692#[cfg(unix)]
1693fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1694    use std::os::unix::fs::MetadataExt;
1695
1696    path.as_ref().metadata().map(|md| md.dev())
1697}
1698
1699#[cfg(windows)]
1700fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
1701    use winapi_util::{file, Handle};
1702
1703    let h = Handle::from_path_any(path)?;
1704    file::information(h).map(|info| info.volume_serial_number())
1705}
1706
1707#[cfg(not(any(unix, windows)))]
1708fn device_num<P: AsRef<Path>>(_: P) -> io::Result<u64> {
1709    Err(io::Error::new(
1710        io::ErrorKind::Other,
1711        "walkdir: same_file_system option not supported on this platform",
1712    ))
1713}
1714
1715#[cfg(test)]
1716mod tests {
1717    use std::fs::{self, File};
1718    use std::io::Write;
1719    use std::path::Path;
1720    use std::sync::{Arc, Mutex};
1721
1722    use super::{DirEntry, WalkBuilder, WalkState};
1723    use tests::TempDir;
1724
1725    fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
1726        let mut file = File::create(path).unwrap();
1727        file.write_all(contents.as_bytes()).unwrap();
1728    }
1729
1730    fn wfile_size<P: AsRef<Path>>(path: P, size: u64) {
1731        let file = File::create(path).unwrap();
1732        file.set_len(size).unwrap();
1733    }
1734
1735    #[cfg(unix)]
1736    fn symlink<P: AsRef<Path>, Q: AsRef<Path>>(src: P, dst: Q) {
1737        use std::os::unix::fs::symlink;
1738        symlink(src, dst).unwrap();
1739    }
1740
1741    fn mkdirp<P: AsRef<Path>>(path: P) {
1742        fs::create_dir_all(path).unwrap();
1743    }
1744
1745    fn normal_path(unix: &str) -> String {
1746        if cfg!(windows) {
1747            unix.replace("\\", "/")
1748        } else {
1749            unix.to_string()
1750        }
1751    }
1752
1753    fn walk_collect(prefix: &Path, builder: &WalkBuilder) -> Vec<String> {
1754        let mut paths = vec![];
1755        for result in builder.build() {
1756            let dent = match result {
1757                Err(_) => continue,
1758                Ok(dent) => dent,
1759            };
1760            let path = dent.path().strip_prefix(prefix).unwrap();
1761            if path.as_os_str().is_empty() {
1762                continue;
1763            }
1764            paths.push(normal_path(path.to_str().unwrap()));
1765        }
1766        paths.sort();
1767        paths
1768    }
1769
1770    fn walk_collect_parallel(prefix: &Path, builder: &WalkBuilder) -> Vec<String> {
1771        let mut paths = vec![];
1772        for dent in walk_collect_entries_parallel(builder) {
1773            let path = dent.path().strip_prefix(prefix).unwrap();
1774            if path.as_os_str().is_empty() {
1775                continue;
1776            }
1777            paths.push(normal_path(path.to_str().unwrap()));
1778        }
1779        paths.sort();
1780        paths
1781    }
1782
1783    fn walk_collect_entries_parallel(builder: &WalkBuilder) -> Vec<DirEntry> {
1784        let dents = Arc::new(Mutex::new(vec![]));
1785        builder.build_parallel().run(|| {
1786            let dents = dents.clone();
1787            Box::new(move |result| {
1788                if let Ok(dent) = result {
1789                    dents.lock().unwrap().push(dent);
1790                }
1791                WalkState::Continue
1792            })
1793        });
1794
1795        let dents = dents.lock().unwrap();
1796        dents.to_vec()
1797    }
1798
1799    fn mkpaths(paths: &[&str]) -> Vec<String> {
1800        let mut paths: Vec<_> = paths.iter().map(|s| s.to_string()).collect();
1801        paths.sort();
1802        paths
1803    }
1804
1805    fn tmpdir() -> TempDir {
1806        TempDir::new().unwrap()
1807    }
1808
1809    fn assert_paths(prefix: &Path, builder: &WalkBuilder, expected: &[&str]) {
1810        let got = walk_collect(prefix, builder);
1811        assert_eq!(got, mkpaths(expected), "single threaded");
1812        let got = walk_collect_parallel(prefix, builder);
1813        assert_eq!(got, mkpaths(expected), "parallel");
1814    }
1815
1816    #[test]
1817    fn no_ignores() {
1818        let td = tmpdir();
1819        mkdirp(td.path().join("a/b/c"));
1820        mkdirp(td.path().join("x/y"));
1821        wfile(td.path().join("a/b/foo"), "");
1822        wfile(td.path().join("x/y/foo"), "");
1823
1824        assert_paths(
1825            td.path(),
1826            &WalkBuilder::new(td.path()),
1827            &["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
1828        );
1829    }
1830
1831    #[test]
1832    fn custom_ignore() {
1833        let td = tmpdir();
1834        let custom_ignore = ".customignore";
1835        mkdirp(td.path().join("a"));
1836        wfile(td.path().join(custom_ignore), "foo");
1837        wfile(td.path().join("foo"), "");
1838        wfile(td.path().join("a/foo"), "");
1839        wfile(td.path().join("bar"), "");
1840        wfile(td.path().join("a/bar"), "");
1841
1842        let mut builder = WalkBuilder::new(td.path());
1843        builder.add_custom_ignore_filename(&custom_ignore);
1844        assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
1845    }
1846
1847    #[test]
1848    fn custom_ignore_exclusive_use() {
1849        let td = tmpdir();
1850        let custom_ignore = ".customignore";
1851        mkdirp(td.path().join("a"));
1852        wfile(td.path().join(custom_ignore), "foo");
1853        wfile(td.path().join("foo"), "");
1854        wfile(td.path().join("a/foo"), "");
1855        wfile(td.path().join("bar"), "");
1856        wfile(td.path().join("a/bar"), "");
1857
1858        let mut builder = WalkBuilder::new(td.path());
1859        builder.ignore(false);
1860        builder.git_ignore(false);
1861        builder.git_global(false);
1862        builder.git_exclude(false);
1863        builder.add_custom_ignore_filename(&custom_ignore);
1864        assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
1865    }
1866
1867    #[test]
1868    fn gitignore() {
1869        let td = tmpdir();
1870        mkdirp(td.path().join(".git"));
1871        mkdirp(td.path().join("a"));
1872        wfile(td.path().join(".gitignore"), "foo");
1873        wfile(td.path().join("foo"), "");
1874        wfile(td.path().join("a/foo"), "");
1875        wfile(td.path().join("bar"), "");
1876        wfile(td.path().join("a/bar"), "");
1877
1878        assert_paths(
1879            td.path(),
1880            &WalkBuilder::new(td.path()),
1881            &["bar", "a", "a/bar"],
1882        );
1883    }
1884
1885    #[test]
1886    fn explicit_ignore() {
1887        let td = tmpdir();
1888        let igpath = td.path().join(".not-an-ignore");
1889        mkdirp(td.path().join("a"));
1890        wfile(&igpath, "foo");
1891        wfile(td.path().join("foo"), "");
1892        wfile(td.path().join("a/foo"), "");
1893        wfile(td.path().join("bar"), "");
1894        wfile(td.path().join("a/bar"), "");
1895
1896        let mut builder = WalkBuilder::new(td.path());
1897        assert!(builder.add_ignore(&igpath).is_none());
1898        assert_paths(td.path(), &builder, &["bar", "a", "a/bar"]);
1899    }
1900
1901    #[test]
1902    fn explicit_ignore_exclusive_use() {
1903        let td = tmpdir();
1904        let igpath = td.path().join(".not-an-ignore");
1905        mkdirp(td.path().join("a"));
1906        wfile(&igpath, "foo");
1907        wfile(td.path().join("foo"), "");
1908        wfile(td.path().join("a/foo"), "");
1909        wfile(td.path().join("bar"), "");
1910        wfile(td.path().join("a/bar"), "");
1911
1912        let mut builder = WalkBuilder::new(td.path());
1913        builder.standard_filters(false);
1914        assert!(builder.add_ignore(&igpath).is_none());
1915        assert_paths(
1916            td.path(),
1917            &builder,
1918            &[".not-an-ignore", "bar", "a", "a/bar"],
1919        );
1920    }
1921
1922    #[test]
1923    fn gitignore_parent() {
1924        let td = tmpdir();
1925        mkdirp(td.path().join(".git"));
1926        mkdirp(td.path().join("a"));
1927        wfile(td.path().join(".gitignore"), "foo");
1928        wfile(td.path().join("a/foo"), "");
1929        wfile(td.path().join("a/bar"), "");
1930
1931        let root = td.path().join("a");
1932        assert_paths(&root, &WalkBuilder::new(&root), &["bar"]);
1933    }
1934
1935    #[test]
1936    fn max_depth() {
1937        let td = tmpdir();
1938        mkdirp(td.path().join("a/b/c"));
1939        wfile(td.path().join("foo"), "");
1940        wfile(td.path().join("a/foo"), "");
1941        wfile(td.path().join("a/b/foo"), "");
1942        wfile(td.path().join("a/b/c/foo"), "");
1943
1944        let mut builder = WalkBuilder::new(td.path());
1945        assert_paths(
1946            td.path(),
1947            &builder,
1948            &["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
1949        );
1950        assert_paths(td.path(), builder.max_depth(Some(0)), &[]);
1951        assert_paths(td.path(), builder.max_depth(Some(1)), &["a", "foo"]);
1952        assert_paths(
1953            td.path(),
1954            builder.max_depth(Some(2)),
1955            &["a", "a/b", "foo", "a/foo"],
1956        );
1957    }
1958
1959    #[test]
1960    fn max_filesize() {
1961        let td = tmpdir();
1962        mkdirp(td.path().join("a/b"));
1963        wfile_size(td.path().join("foo"), 0);
1964        wfile_size(td.path().join("bar"), 400);
1965        wfile_size(td.path().join("baz"), 600);
1966        wfile_size(td.path().join("a/foo"), 600);
1967        wfile_size(td.path().join("a/bar"), 500);
1968        wfile_size(td.path().join("a/baz"), 200);
1969
1970        let mut builder = WalkBuilder::new(td.path());
1971        assert_paths(
1972            td.path(),
1973            &builder,
1974            &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
1975        );
1976        assert_paths(
1977            td.path(),
1978            builder.max_filesize(Some(0)),
1979            &["a", "a/b", "foo"],
1980        );
1981        assert_paths(
1982            td.path(),
1983            builder.max_filesize(Some(500)),
1984            &["a", "a/b", "foo", "bar", "a/bar", "a/baz"],
1985        );
1986        assert_paths(
1987            td.path(),
1988            builder.max_filesize(Some(50000)),
1989            &["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
1990        );
1991    }
1992
1993    #[cfg(unix)] // because symlinks on windows are weird
1994    #[test]
1995    fn symlinks() {
1996        let td = tmpdir();
1997        mkdirp(td.path().join("a/b"));
1998        symlink(td.path().join("a/b"), td.path().join("z"));
1999        wfile(td.path().join("a/b/foo"), "");
2000
2001        let mut builder = WalkBuilder::new(td.path());
2002        assert_paths(td.path(), &builder, &["a", "a/b", "a/b/foo", "z"]);
2003        assert_paths(
2004            td.path(),
2005            &builder.follow_links(true),
2006            &["a", "a/b", "a/b/foo", "z", "z/foo"],
2007        );
2008    }
2009
2010    #[cfg(unix)] // because symlinks on windows are weird
2011    #[test]
2012    fn first_path_not_symlink() {
2013        let td = tmpdir();
2014        mkdirp(td.path().join("foo"));
2015
2016        let dents = WalkBuilder::new(td.path().join("foo"))
2017            .build()
2018            .into_iter()
2019            .collect::<Result<Vec<_>, _>>()
2020            .unwrap();
2021        assert_eq!(1, dents.len());
2022        assert!(!dents[0].path_is_symlink());
2023
2024        let dents = walk_collect_entries_parallel(&WalkBuilder::new(td.path().join("foo")));
2025        assert_eq!(1, dents.len());
2026        assert!(!dents[0].path_is_symlink());
2027    }
2028
2029    #[cfg(unix)] // because symlinks on windows are weird
2030    #[test]
2031    fn symlink_loop() {
2032        let td = tmpdir();
2033        mkdirp(td.path().join("a/b"));
2034        symlink(td.path().join("a"), td.path().join("a/b/c"));
2035
2036        let mut builder = WalkBuilder::new(td.path());
2037        assert_paths(td.path(), &builder, &["a", "a/b", "a/b/c"]);
2038        assert_paths(td.path(), &builder.follow_links(true), &["a", "a/b"]);
2039    }
2040
2041    // It's a little tricky to test the 'same_file_system' option since
2042    // we need an environment with more than one file system. We adopt a
2043    // heuristic where /sys is typically a distinct volume on Linux and roll
2044    // with that.
2045    #[test]
2046    #[cfg(target_os = "linux")]
2047    fn same_file_system() {
2048        use super::device_num;
2049
2050        // If for some reason /sys doesn't exist or isn't a directory, just
2051        // skip this test.
2052        if !Path::new("/sys").is_dir() {
2053            return;
2054        }
2055
2056        // If our test directory actually isn't a different volume from /sys,
2057        // then this test is meaningless and we shouldn't run it.
2058        let td = tmpdir();
2059        if device_num(td.path()).unwrap() == device_num("/sys").unwrap() {
2060            return;
2061        }
2062
2063        mkdirp(td.path().join("same_file"));
2064        symlink("/sys", td.path().join("same_file").join("alink"));
2065
2066        // Create a symlink to sys and enable following symlinks. If the
2067        // same_file_system option doesn't work, then this probably will hit a
2068        // permission error. Otherwise, it should just skip over the symlink
2069        // completely.
2070        let mut builder = WalkBuilder::new(td.path());
2071        builder.follow_links(true).same_file_system(true);
2072        assert_paths(td.path(), &builder, &["same_file", "same_file/alink"]);
2073    }
2074}