Skip to content

Commit

Permalink
Merge branch 'release/v0.2' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
robfs committed Mar 9, 2022
2 parents aff0e34 + 3defa94 commit c7ca83b
Show file tree
Hide file tree
Showing 27 changed files with 1,211 additions and 320 deletions.
Binary file added .DS_Store
Binary file not shown.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
*.jl.*.cov
*.jl.cov
*.jl.mem
/Manifest.toml
/docs/build/
.env
45 changes: 45 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Documentation: http://docs.travis-ci.com/user/languages/julia
language: julia
notifications:
email: false
julia:
- 1.7
- nightly
os:
- linux
arch:
- x64
cache:
directories:
- ~/.julia/artifacts
jobs:
fast_finish: true
allow_failures:
- julia: nightly
include:
- stage: Documentation
julia: 1
script: |
julia --project=docs -e '
using Pkg
Pkg.develop(PackageSpec(path=pwd()))
Pkg.instantiate()
using Documenter: DocMeta, doctest
using XbrlXML
DocMeta.setdocmeta!(XbrlXML, :DocTestSetup, :(using XbrlXML); recursive=true)
doctest(XbrlXML)
include("docs/make.jl")'
after_success: skip
after_success:
- |
julia -e '
using Pkg
Pkg.add("Coverage")
using Coverage
Codecov.submit(process_folder())'
- |
julia -e '
using Pkg
Pkg.add("Coverage")
using Coverage
Coveralls.submit(process_folder())'
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "XbrlXML"
uuid = "bd8e2f07-ddbe-4e10-a07d-d9c59cd9946f"
authors = ["Rob <[email protected]> and contributors"]
version = "0.1.2"
version = "0.2.0"

[deps]
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Expand All @@ -14,10 +14,10 @@ ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"

[compat]
EzXML = "1.1.0"
HTTP = "0.9.17"
LRUCache = "1.3.0"
Memoize = "0.4.4"
ZipFile = "0.9.4"
HTTP = "0.9.17"
julia = "1.7"

[extras]
Expand Down
16 changes: 11 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
# XbrlXML.jl

This is a pure `Julia` implementation of the [`py-xbrl`](https://pypi.org/project/py-xbrl/) python package using [`EzXML.jl`](https://juliapackages.com/p/ezxml) to parse the raw XML.
[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://robfs.github.io/XbrlXML.jl/stable)
[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://robfs.github.io/XbrlXML.jl/dev)
[![Build Status](https://travis-ci.com/robfs/XbrlXML.jl.svg?branch=main)](https://travis-ci.com/robfs/XbrlXML.jl)
[![Coverage](https://codecov.io/gh/robfs/XbrlXML.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/robfs/XbrlXML.jl)
[![Coverage](https://coveralls.io/repos/github/robfs/XbrlXML.jl/badge.svg?branch=main)](https://coveralls.io/github/robfs/XbrlXML.jl?branch=main)

See `Python` documentation for now - docstrings and documentation being written.
This is a pure Julia implementation of the [`py-xbrl`](https://pypi.org/project/py-xbrl/) python package. [`EzXML.jl`](https://juliapackages.com/p/ezxml) is used to parse the raw XML.

See Python documentation for now - docstrings and documentation being written.

```julia
using XbrlXML

cache = HttpCache("./cache")
cache.headers = Dict("User-Agent" => "Your Name, [email protected]")
cache = HttpCache()
cacheheader!(cache, "User-Agent" => "You, [email protected]")

url = "https://www.sec.gov/Archives/edgar/data/0000789019/000156459021002316/msft-10q_20201231.htm"

xbrl_instance = parse_instance(cache, url);
xbrlinstance = parseinstance(cache, url)
```

Binary file added docs/.DS_Store
Binary file not shown.
2 changes: 2 additions & 0 deletions docs/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[deps]
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
30 changes: 30 additions & 0 deletions docs/make.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
using Documenter

using Pkg
Pkg.activate("..")

using XbrlXML

makedocs(
modules=[XbrlXML],
authors="Rob <[email protected]> and contributors",
repo="https://github.com/robfs/XbrlXML.jl/blob/{commit}{path}#{line}",
sitename = "XbrlXML.jl",
format=Documenter.HTML(;
prettyurls=get(ENV, "CI", "false") == "true",
canonical="https://robfs.github.io/XbrlXML.jl",
assets=String[],
),
pages=[
"Home" => "index.md",
"Cache" => "cache.md",
"Instance" => "instance.md",
"Linkbases" => "linkbases.md",
"Taxonomy" => "taxonomy.md",
],
)

deploydocs(;
repo="github.com/robfs/XbrlXML.jl",
devbranch="main",
)
7 changes: 7 additions & 0 deletions docs/src/cache.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Cache

```@autodocs
Modules = [XbrlXML.Cache]
Order = [:module, :type, :function]
Pages = ["Cache.jl"]
```
5 changes: 5 additions & 0 deletions docs/src/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# XbrlXML.jl

```@contents
```

7 changes: 7 additions & 0 deletions docs/src/instance.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Instance

```@autodocs
Modules = [XbrlXML.Instance]
Order = [:type, :function]
Pages = ["Instance.jl"]
```
7 changes: 7 additions & 0 deletions docs/src/linkbases.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Linkbases

```@autodocs
Modules = [XbrlXML.Linkbases]
Order = [:type, :function]
Pages = ["Linkbases.jl"]
```
7 changes: 7 additions & 0 deletions docs/src/taxonomy.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Taxonomy

```@autodocs
Modules = [XbrlXML.Taxonomy]
Order = [:type, :function]
Pages = ["Taxonomy.jl"]
```
13 changes: 0 additions & 13 deletions quicktest.jl

This file was deleted.

117 changes: 99 additions & 18 deletions src/Cache.jl
Original file line number Diff line number Diff line change
@@ -1,60 +1,141 @@
"""
Provides interface to local store of files used for parsing XBRL.
"""
module Cache

using Downloads
using ZipFile

export HttpCache, cache_file
export HttpCache
export cacheheader!, cacheheaders!, cacheheaders, cachedir
export cachefile, purgefile, urltopath, cache_edgar_enclosure

"""
HttpCache(cache_dir="./cache/", headers=Dict())
Create a cache to store files locally for reuse.
`headers` are passed to http `Downloads.download`. Services such as SEC require you to
disclose information about your application.
# Example
```julia-repl
julia> cache = HttpCache("/Users/user/cache/")
/Users/user/cache/
julia> cacheheader!(cache, "User-Agent" => "You [email protected]")
Dict{AbstractString, AbstractString} with 1 entry:
"User-Agent" => "You [email protected]"
```
"""
mutable struct HttpCache
cache_dir::AbstractString
headers::Dict{AbstractString, AbstractString}
cachedir::String
headers::Dict{String, String}

HttpCache(cache_dir="./cache/", headers=Dict{String,String}()) = new(
endswith(cache_dir, "/") ? cache_dir : cache_dir * "/",
HttpCache(cachedir="./cache/", headers=Dict()) = new(
endswith(cachedir, "/") ? cachedir : cachedir * "/",
headers
)

end

function cache_file(cache::HttpCache, file_url::AbstractString)::AbstractString
"""
cachedir(cache::HttpCache)::String
Return the local directory of a cache.
"""
cachedir(cache::HttpCache)::String = cache.cachedir

"""
cacheheaders(cache::HttpCache)::Dict
Return the headers of a cache.
"""
cacheheaders(cache::HttpCache)::Dict{String,String} = cache.headers

Base.show(io::IO, c::HttpCache) = print(
io, "$(abspath(cachedir(c)))"
)

"""
cacheheader!(cache::HttpCache, header::Pair)::Dict
Add a header pair to a cache and return the headers.
"""
function cacheheader!(cache::HttpCache, header::Pair{String,String})::Dict{String,String}
get!(cache.headers, header.first, header.second)
return cacheheaders(cache)
end

"""
cacheheaders!(cache::HttpCache, header::Vector{Pair})::Dict
Add multiple header pairs to a cache and return the headers.
"""
function cacheheaders!(cache::HttpCache, headers::Vector{Pair{String,String}})
for header in headers
cacheheader!(cache, header)
end
end

"""
cachefile(cache::HttpCache, file_url)::String
Save a file located at `file_url` to a local cache.
"""
function cachefile(cache::HttpCache, file_url::String)::String

file_path::AbstractString = url_to_path(cache, file_url)
file_path::String = urltopath(cache, file_url)

isfile(file_path) && return file_path

file_dir_path::AbstractString = join(split(file_path, "/")[1:end-1], "/")

mkpath(file_dir_path)

Downloads.download(file_url, file_path; headers=cache.headers)
Downloads.download(file_url, file_path; headers=cacheheaders(cache))

return file_path

end

function purge_file(cache::HttpCache, file_url::AbstractString)::Bool
"""
purgefile(cache::HttpCache, file_url)::Bool
Remove a file, based on its URL, from a local cache.
"""
function purgefile(cache::HttpCache, file_url::String)::Bool
try
rm(url_to_path(cache, file_url))
rm(urltopath(cache, file_url))
catch
return false
end
return true
end

function url_to_path(cache::HttpCache, url::AbstractString)::AbstractString
rep::Pair{Regex, AbstractString} = r"https?://" => ""
return cache.cache_dir * replace(url, rep)
"""
urltopath(cache::HttpCache, url)::String
Convert a file's `url` to a local cache file.
"""
function urltopath(cache::HttpCache, url::String)::String
rep::Pair{Regex, String} = r"https?://" => ""
return cachedir(cache) * replace(url, rep)
end

function cache_edgar_enclosure(cache::HttpCache, enclosure_url::AbstractString)
"""
cache_edgar_enclosure(cache::HttpCache, enclosure_url)
"""
function cache_edgar_enclosure(cache::HttpCache, enclosure_url::String)::String

if endswith(enclosure_url, ".zip")

enclosure_path::AbstractString = cache_file(cache, enclosure_url)
enclosure_path::AbstractString = cachefile(cache, enclosure_url)

parent_path::AbstractString = join(split(enclosure_url, "/")[1:end-1], "/")

submission_dir_path::AbstractString = url_to_path(cache, parent_path)
submission_dir_path::String = urltopath(cache, parent_path)

r::ZipFile.Reader = ZipFile.Reader(enclosure_path)

Expand All @@ -71,7 +152,7 @@ function cache_edgar_enclosure(cache::HttpCache, enclosure_url::AbstractString)
return submission_dir_path
end

function find_entry_file(cache::HttpCache, dir::AbstractString)::Union{AbstractString,Nothing}
function find_entry_file(cache::HttpCache, dir::String)::Union{String,Nothing}

valid_files::Vector{AbstractString} = []

Expand Down Expand Up @@ -104,7 +185,7 @@ function find_entry_file(cache::HttpCache, dir::AbstractString)::Union{AbstractS
sort!(entry_candidates; by=x -> x[2], rev=true)

if length(entry_candidates) > 0
(file_path, size) = entry_candidates[1]
(file_path::String, size) = entry_candidates[1]
return file_path
end

Expand Down
Loading

0 comments on commit c7ca83b

Please sign in to comment.