Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/uu/pr/locales/en-US.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,14 @@ pr-help-indent =
pr-help-join-lines =
merge full lines, turns off -W line truncation, no column
alignment, --sep-string[=STRING] sets separators
pr-help-expand-tabs = expand input CHARs (TABs) to tab WIDTH (8)
pr-help-help = Print help information

# Page header text
pr-page = Page

pr-try-help-message = Try 'pr --help' for more information.
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This message in other utilities we get from using the UError from uucore, it just wraps the error message with this for each utility. For example this error message: https://github.com/uutils/coreutils/blob/main/src/uu/stty/src/stty.rs#L447 is wrapped with: "Try "stty --help" for more imformation."


# Error messages
pr-error-reading-input = pr: Reading from input {$file} gave error
pr-error-unknown-filetype = pr: {$file}: unknown filetype
Expand All @@ -98,3 +101,4 @@ pr-error-no-such-file = pr: cannot open {$file}, No such file or directory
pr-error-column-merge-conflict = cannot specify number of columns when printing in parallel
pr-error-across-merge-conflict = cannot specify both printing across and printing in parallel
pr-error-invalid-pages-range = invalid --pages argument '{$start}:{$end}'
pr-error-invalid-expand-tab-argument ='-e' extra characters or invalid number in the argument: ‘{$arg}’
4 changes: 4 additions & 0 deletions src/uu/pr/locales/fr-FR.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,14 @@ pr-help-indent =
pr-help-join-lines =
fusionner les lignes complètes, désactive la troncature de ligne -W, aucun
alignement de colonne, --sep-string[=CHAÎNE] définit les séparateurs
pr-help-expand-tabs = convertir les CHARs d'entrée (TABs) en largeur de tabulation WIDTH (8)
pr-help-help = Afficher les informations d'aide

# Texte d'en-tête de page
pr-page = Page

pr-try-help-message = Essayez 'pr --help' pour plus d'informations.

# Messages d'erreur
pr-error-reading-input = pr : La lecture depuis l'entrée {$file} a donné une erreur
pr-error-unknown-filetype = pr : {$file} : type de fichier inconnu
Expand All @@ -97,3 +100,4 @@ pr-error-no-such-file = pr : impossible d'ouvrir {$file}, Aucun fichier ou répe
pr-error-column-merge-conflict = impossible de spécifier le nombre de colonnes lors de l'impression en parallèle
pr-error-across-merge-conflict = impossible de spécifier à la fois l'impression transversale et l'impression en parallèle
pr-error-invalid-pages-range = argument --pages invalide '{$start}:{$end}'
pr-error-invalid-expand-tab-argument = Caractères supplémentaires ou nombre invalide dans l'argument de '-e': '{$arg}'
116 changes: 110 additions & 6 deletions src/uu/pr/src/pr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use itertools::Itertools;
use regex::Regex;
use std::fs::metadata;
use std::io::{Read, Write, stderr, stdin, stdout};
use std::str::Utf8Error;
use std::string::FromUtf8Error;
use std::time::SystemTime;
use thiserror::Error;
Expand Down Expand Up @@ -57,6 +58,7 @@ mod options {
pub const JOIN_LINES: &str = "join-lines";
pub const HELP: &str = "help";
pub const FILES: &str = "files";
pub const EXPAND_TABS: &str = "expand-tabs";
}

struct OutputOptions {
Expand All @@ -79,6 +81,7 @@ struct OutputOptions {
join_lines: bool,
col_sep_for_printing: String,
line_width: Option<usize>,
expand_tabs: Option<ExpandTabsOptions>,
}

/// One line of an input file, annotated with file, page, and line number.
Expand All @@ -96,10 +99,24 @@ impl FileLine {
page_number: usize,
line_number: usize,
buf: &[u8],
) -> Result<Self, FromUtf8Error> {
options: &OutputOptions,
) -> Result<Self, PrError> {
// TODO Don't read bytes to String just to directly write them
// out again anyway.
let line_content = String::from_utf8(buf.to_vec())?;
let line_content = if let Some(expand_tabs) = &options.expand_tabs {
// Anticipate a few expandable chars to reduce reallocations
let mut line_content =
String::with_capacity(buf.len() + buf.len() / 20 * expand_tabs.width as usize);
// validate utf correctness
let s = std::str::from_utf8(buf)?;
for b in s.as_bytes() {
apply_expand_tab(&mut line_content, *b, expand_tabs);
}
line_content
} else {
String::from_utf8(buf.to_vec())?
};

Ok(Self {
file_id,
page_number,
Expand All @@ -123,6 +140,21 @@ struct NumberingMode {
first_number: usize,
}

#[derive(Debug)]
struct ExpandTabsOptions {
input_char: char,
width: i32,
}

impl Default for ExpandTabsOptions {
fn default() -> Self {
Self {
width: 8,
input_char: TAB,
}
}
}

impl Default for NumberingMode {
fn default() -> Self {
Self {
Expand All @@ -149,6 +181,14 @@ impl From<FromUtf8Error> for PrError {
}
}

impl From<Utf8Error> for PrError {
fn from(err: Utf8Error) -> Self {
Self::EncounteredErrors {
msg: err.to_string(),
}
}
}

#[derive(Debug, Error)]
enum PrError {
#[error("pr: {msg}")]
Expand Down Expand Up @@ -326,6 +366,14 @@ pub fn uu_app() -> Command {
.action(ArgAction::Append)
.value_hint(clap::ValueHint::FilePath),
)
.arg(
Arg::new(options::EXPAND_TABS)
.long(options::EXPAND_TABS)
.short('e')
.num_args(1)
.value_name("[CHAR][WIDTH]")
.help(translate!("pr-help-expand-tabs")),
)
}

#[uucore::main]
Expand Down Expand Up @@ -390,6 +438,7 @@ fn recreate_arguments(args: &[String]) -> Vec<String> {
let column_page_option = Regex::new(r"^[-+]\d+.*").unwrap();
let num_regex = Regex::new(r"^[^-]\d*$").unwrap();
let n_regex = Regex::new(r"^-n\s*$").unwrap();
let e_regex = Regex::new(r"^-e").unwrap();
let mut arguments = args.to_owned();
let num_option = args.iter().find_position(|x| n_regex.is_match(x.trim()));
if let Some((pos, _value)) = num_option {
Expand All @@ -402,6 +451,17 @@ fn recreate_arguments(args: &[String]) -> Vec<String> {
}
}

// To ensure not to accidentally delete the next argument after a short flag for -e we insert
// the default values for the -e flag is '-e' is present without direct arguments.
let expand_tabs_option = arguments
.iter()
.find_position(|x| e_regex.is_match(x.trim()));
if let Some((pos, value)) = expand_tabs_option {
if value.trim().len() <= 2 {
arguments[pos] = "-e\t8".to_string();
}
}

arguments
.into_iter()
.filter(|i| !column_page_option.is_match(i))
Expand Down Expand Up @@ -523,6 +583,26 @@ fn build_options(
}
});

let expand_tabs = matches
.get_one::<String>(options::EXPAND_TABS)
.map(|s| {
s.chars().next().map_or(Ok(ExpandTabsOptions::default()), |c| {
if c.is_ascii_digit() {
s
.parse()
.map_err(|_e| PrError::EncounteredErrors { msg: format!("{}\n{}", translate!("pr-error-invalid-expand-tab-argument", "arg" => s), translate!("pr-try-help-message")) })
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.map(|width| ExpandTabsOptions{input_char: TAB, width})
} else if s.len() > 1 {
s[1..]
.parse()
.map_err(|_e| PrError::EncounteredErrors { msg: format!("{}\n{}", translate!("pr-error-invalid-expand-tab-argument", "arg" => &s[1..]), translate!("pr-try-help-message")) })
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.map(|width| ExpandTabsOptions{input_char: c, width})
} else {
Ok(ExpandTabsOptions{input_char: c, width: 8})
}
})
}).transpose()?;

let double_space = matches.get_flag(options::DOUBLE_SPACE);

let content_line_separator = if double_space {
Expand Down Expand Up @@ -759,6 +839,7 @@ fn build_options(
join_lines,
col_sep_for_printing,
line_width,
expand_tabs,
})
}

Expand All @@ -776,6 +857,27 @@ fn read_to_end(path: &str) -> Result<Vec<u8>, std::io::Error> {
}
}

fn apply_expand_tab(chunk: &mut String, byte: u8, expand_options: &ExpandTabsOptions) {
if byte == expand_options.input_char as u8 {
// If the byte encountered is the input char we use width to calculate
// the amount of spaces needed (if no input char given we stored '\t'
// in our struct)
let spaces_needed =
expand_options.width as usize - (chunk.len() % expand_options.width as usize);
chunk.extend(std::iter::repeat_n(' ', spaces_needed));
} else if byte == TAB as u8 {
// If a byte got passed to the -e flag (eg -ea1) which is not '\t' GNU
// still expands it but does not use an optionally given width parameter
// but does the '\t' expansion with the default value (8)
let spaces_needed = 8 - (chunk.len() % 8);
chunk.extend(std::iter::repeat_n(' ', spaces_needed));
} else {
// This arm means the byte is neither '\t' nor the bytes to be
// expanded
chunk.push(byte as char);
}
}

fn pr(path: &str, options: &OutputOptions) -> Result<i32, PrError> {
// Read the entire contents of the file into a buffer.
//
Expand Down Expand Up @@ -805,7 +907,7 @@ fn get_pages(
options: &OutputOptions,
file_id: usize,
buf: &[u8],
) -> Result<Vec<(usize, Vec<FileLine>)>, FromUtf8Error> {
) -> Result<Vec<(usize, Vec<FileLine>)>, PrError> {
let start_page = options.start_page;
let end_page = options.end_page;
let lines_needed_per_page = lines_to_read_for_page(options);
Expand Down Expand Up @@ -840,7 +942,8 @@ fn get_pages(
// If the file has the pattern `\n\f`, don't treat the
// `\f` as its own line; instead ignore the empty line.
} else {
let file_line = FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i])?;
let file_line =
FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i], options)?;
page.push(file_line);
}

Expand All @@ -865,7 +968,8 @@ fn get_pages(
// If the file has the pattern `\f\n`, don't treat the
// `\n` as its own line; instead ignore the empty line.
} else {
let file_line = FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i])?;
let file_line =
FileLine::from_buf(file_id, page_num, line_num, &buf[prev..i], options)?;
page.push(file_line);
line_num += 1;
}
Expand All @@ -887,7 +991,7 @@ fn get_pages(

// Consider all trailing bytes as the last line.
if prev < buf.len() {
let file_line = FileLine::from_buf(file_id, page_num, line_num, &buf[prev..])?;
let file_line = FileLine::from_buf(file_id, page_num, line_num, &buf[prev..], options)?;
page.push(file_line);
}

Expand Down
119 changes: 118 additions & 1 deletion tests/by-util/test_pr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (ToDO) Sdivide
// spell-checker:ignore (ToDO) Sdivide ading

use jiff::{Timestamp, ToSpan};
use regex::Regex;
Expand Down Expand Up @@ -756,3 +756,120 @@ fn test_merge_one_long_one_short() {
.succeeds()
.stdout_matches(&regex);
}

#[test]
fn test_simple_expand_tab() {
let whitespace = " ".repeat(50);
let datetime_pattern = r"\d\d\d\d-\d\d-\d\d \d\d:\d\d";
let page_1_beginning = format!("\n\n{datetime_pattern}{whitespace}Page 1\n\n\n");

let output_regex = Regex::new(&format!("{page_1_beginning}hello world\nabc def\n leading\ntrail \n8chars00 \n")).unwrap();

new_ucmd!()
.arg("-e")
.pipe_in("hello\tworld\nabc\tdef\n\tleading\ntrail\t\n8chars00\t\n")
.succeeds()
.stdout_matches(&output_regex);
}

#[test]
fn test_simple_expand_tab_with_digit_argument() {
let whitespace = " ".repeat(50);
let datetime_pattern = r"\d\d\d\d-\d\d-\d\d \d\d:\d\d";
let page_1_beginning = format!("\n\n{datetime_pattern}{whitespace}Page 1\n\n\n");
let input = "hello\tworld\nabc\tdef\n\tleading\ntrail\t\n8chars00\t\n";

let test_cases = vec![
("-e2", Regex::new(&format!("{page_1_beginning}hello world\nabc def\n leading\ntrail \n8chars00 \n")).unwrap()),
("-e3", Regex::new(&format!("{page_1_beginning}hello world\nabc def\n leading\ntrail \n8chars00 \n")).unwrap()),
("-e8", Regex::new(&format!("{page_1_beginning}hello world\nabc def\n leading\ntrail \n8chars00 \n")).unwrap()),
("-e10", Regex::new(&format!("{page_1_beginning}hello world\nabc def\n leading\ntrail \n8chars00 \n")).unwrap()),
];
for (arg, output_regex) in test_cases {
new_ucmd!()
.arg(arg)
.pipe_in(input)
.succeeds()
.stdout_matches(&output_regex);
}
}

#[test]
fn test_simple_expand_tab_with_char_argument() {
let whitespace = " ".repeat(50);
let datetime_pattern = r"\d\d\d\d-\d\d-\d\d \d\d:\d\d";
let page_1_beginning = format!("\n\n{datetime_pattern}{whitespace}Page 1\n\n\n");
let input = "hello\tworld\nabc\tdef\n\tleading\ntrail\t\n8chars00\t\n";

let test_cases = vec![
("-ea", Regex::new(&format!("{page_1_beginning}hello world\n bc def\n le ding\ntr il \n8ch rs00 \n")).unwrap()),
("-ee", Regex::new(&format!("{page_1_beginning}h llo world\nabc d f\n l ading\ntrail \n8chars00 \n")).unwrap()),
];
for (arg, output_regex) in test_cases {
new_ucmd!()
.arg(arg)
.pipe_in(input)
.succeeds()
.stdout_matches(&output_regex);
}
}

#[test]
fn test_simple_expand_tab_with_both_arguments() {
// test different variations of what char to expand
// a2, e3, t10
let whitespace = " ".repeat(50);
let datetime_pattern = r"\d\d\d\d-\d\d-\d\d \d\d:\d\d";
let page_1_beginning = format!("\n\n{datetime_pattern}{whitespace}Page 1\n\n\n");
let input = "hello\tworld\nabc\tdef\n\tleading\ntrail\t\n8chars00\t\n";

let test_cases = vec![
("-ea2", Regex::new(&format!("{page_1_beginning}hello world\n bc def\n le ding\ntr il \n8ch rs00 \n")).unwrap()),
("-ee3", Regex::new(&format!("{page_1_beginning}h llo world\nabc d f\n l ading\ntrail \n8chars00 \n")).unwrap()),
("-et10", Regex::new(&format!("{page_1_beginning}hello world\nabc def\n leading\n rail \n8chars00 \n")).unwrap()),
];
for (arg, output_regex) in test_cases {
new_ucmd!()
.arg(arg)
.pipe_in(input)
.succeeds()
.stdout_matches(&output_regex);
}
}

/* cSpell:disable */
#[test]
fn test_invalid_expand_tab_arguments() {
let test_file_path = "empty_test_file";

let test_cases = vec![
// incorrect argument
("-esdgjiojiosdgjiogd", "dgjiojiosdgjiogd"),
// 2 non digit parameter
("-eab", "b"),
// non digit after first digit
("-e1a", "1a"),
// non digit after first digit after allowed input char
("-ea1a", "1a"),
// > i32 max
("-e2147483648", "2147483648"),
// > i32 max after allowed input char
("-ea2147483648", "2147483648"),
];

for (arg, error_msg_field) in test_cases {
new_ucmd!()
.args(&[arg, test_file_path])
.fails()
.stderr_contains(format!("pr: '-e' extra characters or invalid number in the argument: ‘{error_msg_field}’\nTry 'pr --help' for more information."));
}
}
/* cSpell:enable */

#[test]
fn test_expand_tab_does_not_consume_next_argument() {
let test_file_path = "empty_test_file";
new_ucmd!().args(&["-e", test_file_path]).succeeds();
new_ucmd!().args(&["-ea", test_file_path]).succeeds();
new_ucmd!().args(&["-ea1", test_file_path]).succeeds();
}
Empty file.
Loading