Skip to content

Commit a8d75c1

Browse files
vlib: add archive.tar module to enable reading of .tar ang .tar.gz files (#24995)
1 parent b876644 commit a8d75c1

13 files changed

Lines changed: 969 additions & 0 deletions

File tree

‎examples/archive/tar_gz_reader.v‎

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
import archive.tar
2+
import flag
3+
import net.http
4+
import os
5+
import term
6+
7+
const default_url = 'https://github.com/vlang/v/archive/refs/tags/v0.1.3.tar.gz'
8+
9+
@[heap]
10+
struct Context {
11+
url string // Web starting with http:// or https://. Local starting with file:///
12+
chunks bool // true: decompress with callback
13+
debug int // print debug lines
14+
max_blocks int // if max_blocks > 0 and is reached stops early.
15+
filename string // if filename is found as a path of a data block, stops early.
16+
}
17+
18+
fn (ctx &Context) read_last_block(mut read tar.Read) bool {
19+
if ctx.max_blocks > 0 && ctx.max_blocks < read.get_block_number() {
20+
read.stop_early = true
21+
return true
22+
}
23+
return false
24+
}
25+
26+
fn new_context() !&Context {
27+
mut fp := flag.new_flag_parser(os.args)
28+
fp.application('tar_gz_reader')
29+
fp.version('0.0.20250721')
30+
fp.description('Reads into memory selected sections of *.tar.gz. archives from https or home_dir.')
31+
fp.skip_executable()
32+
ctx := &Context{
33+
url: fp.string('url', `u`, default_url, 'archive *.tar.gz URL, default(${default_url}). Start name with file:/// for local')
34+
chunks: fp.bool('chunks', `c`, false, 'decompress with chunks to reduce RAM usage, default(false)')
35+
debug: fp.int('debug', `d`, 0, 'prints blocks: 1=other, 2:+dirs, 3=+files, 4=+data, default(0=silent)')
36+
max_blocks: fp.int('max_blocks', `m`, 0, 'maximum blocks to read, stop early. Default(0=read all)')
37+
filename: fp.string('filename', `f`, '', 'filename content complete print, stop early. Default(empty means none)')
38+
}
39+
additional := fp.finalize()!
40+
if additional.len > 0 {
41+
println('unprocessed args ${additional.join_lines()}')
42+
}
43+
return ctx
44+
}
45+
46+
// Downloader downloads a *.tar.gz using HTTP chunks
47+
struct Downloader {
48+
mut:
49+
chunks int
50+
data []u8
51+
}
52+
53+
fn new_downloader(url string) !&Downloader {
54+
mut downloader := &Downloader{}
55+
params := http.DownloaderParams{
56+
downloader: downloader
57+
}
58+
if url.starts_with('http://') || url.starts_with('https://') {
59+
http.download_file_with_progress(url, '', params)!
60+
} else if url.starts_with('file:///') {
61+
path := '${os.home_dir()}/${url[8..]}'
62+
println('path ${path}')
63+
downloader.data = os.read_bytes(path)!
64+
}
65+
return downloader
66+
}
67+
68+
fn (mut d Downloader) on_start(mut request http.Request, path string) ! {}
69+
70+
fn (mut d Downloader) on_chunk(request &http.Request, chunk []u8, already_received u64, expected u64) ! {
71+
if expected == 0 {
72+
return
73+
}
74+
d.chunks++
75+
d.data << chunk
76+
}
77+
78+
fn (mut d Downloader) on_finish(request &http.Request, response &http.Response) ! {}
79+
80+
struct FileReader implements tar.Reader {
81+
ctx &Context
82+
mut:
83+
filepath string
84+
content []u8
85+
}
86+
87+
fn new_file_reader(ctx &Context) FileReader {
88+
return FileReader{
89+
ctx: ctx
90+
}
91+
}
92+
93+
fn (mut f FileReader) other_block(mut read tar.Read, details string) {
94+
if f.ctx.read_last_block(mut read) {
95+
return
96+
}
97+
if f.ctx.debug > 0 {
98+
row := 'OTHER block:${read.get_block_number():6} ${read.get_special()} ${details} ${read.get_path()} '
99+
println(term.colorize(term.bright_yellow, row))
100+
}
101+
}
102+
103+
fn (mut f FileReader) dir_block(mut read tar.Read, size u64) {
104+
if f.ctx.read_last_block(mut read) {
105+
return
106+
}
107+
if f.ctx.debug > 1 {
108+
row := 'DIR block:${read.get_block_number():6} ${read.get_path()} size:${size}'
109+
println(term.colorize(term.green, row))
110+
}
111+
}
112+
113+
fn (mut f FileReader) file_block(mut read tar.Read, size u64) {
114+
if f.ctx.read_last_block(mut read) {
115+
return
116+
}
117+
path := read.get_path()
118+
if f.ctx.debug > 2 {
119+
row := ' FILE block:${read.get_block_number():6} ${path} size:${size}'
120+
println(term.colorize(term.bright_blue, row))
121+
}
122+
if f.ctx.filename != '' && f.filepath == '' && path.ends_with(f.ctx.filename) {
123+
f.filepath = path
124+
}
125+
}
126+
127+
fn (mut f FileReader) data_block(mut read tar.Read, data []u8, pending int) {
128+
if f.ctx.read_last_block(mut read) {
129+
return
130+
}
131+
path := read.get_path()
132+
if f.ctx.debug > 3 {
133+
println(' DATA block:${read.get_block_number():6} ${path} len:${data.len} pend:${pending}')
134+
}
135+
if f.ctx.filename != '' {
136+
if f.filepath == path {
137+
f.content << data
138+
if pending == 0 {
139+
// our file of interest data is complete
140+
read.stop_early = true
141+
}
142+
}
143+
}
144+
}
145+
146+
fn main() {
147+
ctx := new_context()!
148+
reader := FileReader{
149+
ctx: ctx
150+
}
151+
mut untar := tar.new_untar(reader)
152+
mut decompressor := tar.new_decompresor(untar)
153+
downloader := new_downloader(ctx.url)!
154+
if ctx.chunks {
155+
decompressor.read_chunks(downloader.data)!
156+
} else {
157+
decompressor.read_all(downloader.data)!
158+
}
159+
println('-'.repeat(80))
160+
println('Download: ${ctx.url} chunks:${downloader.chunks} bytes=${downloader.data.len}')
161+
println('Untar: ${untar}')
162+
println('Content: Path:${reader.filepath} bytes:${reader.content.len}')
163+
println('-'.repeat(80))
164+
println('${reader.content.bytestr()}')
165+
println('-'.repeat(80))
166+
}

‎vlib/archive/README.md‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
## Description
2+
3+
`archive` is a namespace for different archive formats like `tar` or `zip`.

‎vlib/archive/tar/README.md‎

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
## Description
2+
3+
`tar` is a module to access tar archives.
4+
5+
Tape archives (tar) are a file format for storing a sequence of files that can be read and written
6+
as streams. This module covers the reading of the basic sections of archives produced by GNU tools
7+
like Linux command `tar -xvf` but in memory instead modifing the filesystem. Parses directories,
8+
files, and file's content and manage paths longer than 100 chars.
9+
10+
### Read Efficiency
11+
12+
An entire tar file can be read in memory or by chunks. Keeps in memory a single decompressed
13+
[chunk](https://modules.vlang.io/compress.gzip.html#decompress_with_callback) of 32 KB at a time
14+
and also keeps in memory a single tar block of 512 bytes at a time. Convert paths to strings until
15+
needed and the user reader implementation can stop early the reading process.
16+
17+
### Read Example
18+
19+
The tar blocks are parsed and some fields are passed to `Reader` implemented methods.
20+
21+
```v
22+
import os
23+
import archive.tar
24+
25+
fn main() {
26+
os.chdir(@VMODROOT) or {}
27+
path := 'archive/tar/testdata/life.tar.gz'
28+
reader := tar.new_debug_reader()
29+
tar.read_tar_gz_file(path, reader)!
30+
}
31+
```
32+
Look also in `examples` folder the `tar_gz_reader.v` program.
33+

0 commit comments

Comments
 (0)