# Last edited on 2017-05-14 23:15:24 by stolfilocal GATHERING PRICE AND VOLUME DATA # Collected transaction data from some Bitcoin exchanges from the # site "http://bitcoincharts.com/charts/", summarized with various time steps. # The files were cleaned with {cleanup_bitcoincharts_data.sh} # and then with {check_and_fix_price_files.sh} (see below) # Files are in the "fix" subdirectory directory. # The typical file name is # "{TDLO}--{TDHI}-{EXCHANGE}-{CURRENCY}-{TIMESTEP}.txt" where # {TDLO} and {TDHI} are date-times in the format "%Y-%m-%d-%H%M%S", # {EXCHANGE} is the exchange's 4-character tag, {CURRENCY} is the # conversion currency (USD, EUR, etc.) and {TIMESTEP} is "01m", "05m", # "01h", "01d", etc.. # # The "%H%M%S" can be omitted in both dates, and the # "%Y-" and possibly "%m-" can be omitted from {TDHI} if it is the # same as in {TDLO}. All dates and times, in the file names and # contents, are UTC. # # The data file includes all synchronized intervals of length {TIMESTEP} that # intersect the range {TDLO} to {TDHI} inclusive. # The file may include one additional interval at each end, # for context. SUBDIRECTORIES # raw data copy-pasted from bitcoincharts site. # org data files with clean-up format # edt data files merged by hand # fix data files with corrected rounding, ranges, etc. COLLECTING DATA FROM BITCOINCHARTS # To collect daily summary data from bitcoincharts.com, I copy the # "download data" of some relevant exchange/currency pairs ${ex}/${cr} # (see ../volumes/relevant-ex-cr-pairs.dir) # to a single file called ".new", with the lines of each # pair preceded by 70 "~"s and "#FILE ${date1}--${date2}-${ex}-${cr}-01d-raw.txt" # Then I run splitsep < .new # That puts each pair in a separate file. Then I create a list of exchange-currency pairs: cat ../../exchanges-list.txt \ | gawk '/^[ ]*([\#]|$)/{ next; } ((NF == 7) && ($2 == "|")){ print ($5 "." $7); }' \ > .ex-cr-pairs # Then I do one of these: # If all date intervals are the same: cleanup_all_bitcoinchart_raw_files.sh 2014-09-26 2014-11-27 01d # If the date intervals are not the same: fnames=( `ls raw/*--*-[A-Z]???-[A-Z]??-[0-9]?[hdmw].txt | sed -e 's:^raw/::g' -e 's:[.]txt::g'` ) for fname in "${fnames[@]}" ; do excr="${fname%%-[0-9][0-9][mhdw]}" excr="${excr##*[0-9][0-9]-}" ex="${excr%%-*}" cr="${excr##*-}" rawfile="raw/${fname}.txt" orgfile="org/${fname}.txt" cleanup_bitcoincharts_data.sh "${ex}" "${cr}" "${rawfile}" \ > ${orgfile} echo "=== ${fname} ex = ${ex} cr = ${cr} ===" # cat ${orgfile} done EDITING FILES BY HAND # Concatenated all files by exchange, currency, time-interval:: combine_files_ex_cr_tu.sh org # This created a bunch of catsep'ed files called # "org/.{EX}-{CR}-{TU}.txt". Edited each of these files by hand so as # to join the various overlapping data files with same tag # ({EX}-{CR}-{TU}) into one single file. # # Most tags had just one date-time range. The exceptions were some # sample files with short time intervals. # # Moved these catsep'ed files to the "edt" subdir. # # Ran ( for ff in .[A-Z]???-???-???.txt; do printf "$ff " ; \ ../extract_date_time_range.sh < $ff ; \ done ) > .rename # Then edited ".rename" to a bunch of "mv" command # that renamed each of those files as "{DATEINI}--{DATEFIN}-{EX}-{CR}-{TU}.txt" # for {TU} = "01d", and "{DATEINI}-{TIMEINI}--{DATEFIN}-{TIMEFIN}-{EX}-{CR}-{TU}.txt" # for other {TU}. # # Removed the "#FILE" and "~~~" lines with "sed-files". # # Checking whether the files in "edt" cover all in "org" # and "org/JUNK": check_dir_covers_dir.sh edt org # Since the data files in "org are all covered by the files in "edt", # moved all data files in "org" to "org/JUNK". PLOTTING INTERVALS SPANNED # Plotting the time intervals spanned by the various files: # # Separate plots for each {ex}-{cr}-{tu} combination: show_all_date_intervals.sh edt 1 1 1 # Single plot with all files: show_all_date_intervals.sh edt 0 0 0 # Specific plot: which=edt ../show_date_intervals.sh \ ${which} "BSTP" "USD" "01h" \ "Series files for BSTP - USD per BTC - 01h intervals" \ 2010-06-15 2017-07-15 \ > plots/date-intervals-edt-BSTP-USD-01h-a.png CHECKING VALUES # Checking and fixing the consistency of prices and volumes: check_and_fix_price_files.sh 60 `ls edt/20*-01m.txt` check_and_fix_price_files.sh 300 `ls edt/20*-05m.txt` check_and_fix_price_files.sh 900 `ls edt/20*-15m.txt` check_and_fix_price_files.sh 3600 `ls edt/20*-01h.txt` check_and_fix_price_files.sh 43200 `ls edt/20*-12h.txt` check_and_fix_price_files.sh 86400 `ls edt/20*-01d.txt` # Checking and fixing a single file: check_and_fix_price_files.sh 86400 edt/2014-01-07--2017-05-14-KRAK-USD-01d.txt CHECK DATE/TIME RANGES for ff in `cat .this-lot` ; do echo "== ${ff} ==" head -6 fix/${ff} echo "..." tail -3 fix/${ff} done PLOTTING PRICES # Plotting some: plot_prices.sh \ "2013-12-08 (1 min)" \ '(1/24.0)' 6 \ 2013-12-08 2013-12-08 \ 650 1250 YES \ fix/2013-12-08-0000--2013-12-09-0000-MGOX-USD-01m.txt 16 "MtGOX" 1.000 0066aa \ fix/2013-12-08-0000--2013-12-09-0000-BTCE-USD-01m.txt 16 "BTC-e" 1.000 338800 \ > plots/foo.png plot_prices.sh \ "2013-12-13 to 2013-12-18 (5 min)" \ '(6/24.0)' 6 \ 2013-12-13 2013-12-18 \ 610 1250 YES \ fix/2013-12-13-0000--2013-12-18-0000-BTCC-CNY-05m.txt 16 "BTC-China" 5.300 aa0000 \ fix/2013-12-17-0000--2013-12-18-0025-BSTP-USD-05m.txt 16 "Bitstamp" 0.950 3311ff \ fix/2013-12-17-0000--2013-12-18-0040-MGOX-USD-05m.txt 16 "MtGOX" 1.000 0066aa \ > plots/foo.png plot_prices.sh \ "2013-07-01 to 2014-05-02 (hourly)" \ 28 4 \ 2013-07-01 2014-05-02 \ 10 1350 YES \ fix/2013-08-31-2300--2014-01-18-0000-BTCC-CNY-01h.txt 16 "BTC-China" 5.850 aa0000 \ fix/2013-08-31-2300--2014-04-29-0000-BTCE-USD-01h.txt 16 "BTC-e" 1.000 338800 \ fix/2012-10-31-2300--2014-10-17-0000-BSTP-USD-01h.txt 16 "Bitstamp" 0.950 3311ff \ fix/2013-08-31-2300--2014-02-25-0100-MGOX-USD-01h.txt 16 "MtGOX" 1.000 0066aa \ > plots/foo.png plot_prices.sh \ "2013-11-01 to 2013-12-01 (12 hours)" \ 1 6 \ 2013-10-29 2013-12-03 \ 205 1350 YES \ fix/2013-11-01-0000--2013-12-01-0000-BTCC-CNY-12h.txt 16 "BTC-China" 5.850 aa0000 \ fix/2013-11-01-0000--2013-12-01-0000-BSTP-USD-12h.txt 16 "Bitstamp" 0.926 3311ff \ fix/2013-11-01-0000--2013-12-01-0000-MGOX-USD-12h.txt 16 "MtGOX" 1.000 0066aa \ > plots/foo.png plot_prices_old.sh \ "2013-12-16 to 2013-12-20 (15 min)" \ '(0.25/24.0)' '(+16.0)' 4 \ 340 1100 \ fix/2013-12-16--2013-12-20-BTCC-CNY-15m.txt "BTC-China" 5.600 \ fix/2013-12-16--2013-12-20-BSTP-USD-15m.txt "Bitstamp" 0.926 \ fix/2013-12-16--2013-12-20-MGOX-USD-15m.txt "MtGOX" 1.000 \ > plots/2013-12-16--12-20-BTCC5600-BSTP-MGOX-15m.png plot_prices_old.sh \ "2013-12-16 to 2013-12-20 (15 min)" \ '(0.25/24.0)' '(+16.0)' 4 \ 340 1100 \ fix/2013-12-16--2013-12-20-BTCC-CNY-15m.txt "BTC-China" 5.200 \ fix/2013-12-16--2013-12-20-BSTP-USD-15m.txt "Bitstamp" 0.926 \ fix/2013-12-16--2013-12-20-MGOX-USD-15m.txt "MtGOX" 1.000 \ > plots/2013-12-16--12-20-BTCC5200-BSTP-MGOX-15m.png plot_prices_old.sh \ "2013-12-16 to 2013-12-20 (15 min)" \ '(0.25/24.0)' '(+16.0)' 4 \ 340 1100 \ fix/2013-12-16--2013-12-20-BTCC-CNY-15m.txt "BTC-China" 5.000 \ fix/2013-12-16--2013-12-20-BSTP-USD-15m.txt "Bitstamp" 0.926 \ fix/2013-12-16--2013-12-20-MGOX-USD-15m.txt "MtGOX" 1.000 \ > plots/2013-12-16--2013-12-20-BTCC5000-BSTP-MGOX-15m.png plot_prices_old.sh \ "2014-01-05 to 2014-01-13 (hourly)" \ '(1.00/24.0)' '(+5.0)' 4 \ 720 1020 \ fix/2014-01-05--2014-01-13-BTCC-CNY-01h.txt "BTC-China" 6.100 \ fix/2014-01-05--2014-01-13-BSTP-USD-01h.txt "Bitstamp" 1.000 \ fix/2014-01-05--2014-01-13-MGOX-USD-01h.txt "MtGOX" 1.120 \ > plots/2014-01-05--2014-01-13-BTCC-BSTP-MGOX-01h.png plot_prices_old.sh \ "2014-01-18 to 2014-01-30 (hourly)" \ '(1/24.0)' '(+18.0)' 4 \ 360 1350 \ fix/2014-01-18--2014-01-30-MGOX-USD-01h.txt "MtGOX" 1.000 \ fix/2014-01-18--2014-01-30-BSTP-USD-01h.txt "Bitstamp" 1.000 \ > plots/foo.png plot_prices_old.sh \ "2013-09-01 to 2014-01-30 (hourly)" \ '(1/24.0)' '(+9.0)' 7.5 \ 40 1550 \ fix/2013-09-01--2014-01-30-MGOX-USD-01h.txt "MtGOX" 1.000 \ fix/2013-09-01--2014-01-30-BSTP-USD-01h.txt "Bitstamp" 1.000 \ > plots/foo.png plot_prices.sh \ "Mean daily prices 2010-07-17 to 2015-02-20" \ '180' 6 \ 2010-06-20 2015-03-10 \ 0.04 1600 \ "YES" \ fix/2010-07-17--2014-02-25-MGOX-USD-01d.txt 16 "MtGOX" 1.000 0022ff \ fix/2011-09-13--2015-02-20-BSTP-USD-01d.txt 16 "Bitstamp" 1.000 0066ff \ fix/2011-08-14--2015-02-20-BTCE-USD-01d.txt 16 "BTC-e" 1.000 008800 \ fix/2013-03-31--2015-02-20-BFNX-USD-01d.txt 16 "Bitfinex" 1.000 8800dd \ fix/2011-06-13--2015-02-20-BTCC-CNY-01d.txt 16 "BTC-China" 6.100 ff0000 \ fix/2013-06-12--2015-01-23-OKCO-CNY-01d.txt 16 "OKCoin.cn" 6.100 dd4400 \ > plots/all-data.png # The official USD to CNY factor as of 2013-12-17 was 6.073. The factors above were used to get the # plots as close as possible. PLOTTING PRICE RATIOS plot_two_prices_ratio.sh \ "2013-09-01 to 2014-01-30 (hourly)" \ '(1/24.0/(365.25/12))' '(+9.0)' 7.5 \ 2013-09-01 00:00:00 2014-02-24 23:00:00 \ 0.80 1.50 \ fix/2012-10-31-2300--2014-10-17-0000-BSTP-USD-01h.txt "MtGOX" 1.000 \ fix/2013-08-31-2300--2014-02-25-0100-MGOX-USD-01h.txt "Bitstamp" 1.000 \ > plots/2013-09-01--2014-01-30-MGOX-BSTP-ratio.png REFERENCE PRICE See directory "ref-price" and update it as needed. rundate=2015-02-10 lodate=2010-07-17 hidate=2015-02-20 ifile=../../ref-price/out/${rundate}-refprice-01d.txt ofile=${lodate}--${hidate}-PREF-USD-01d.txt ( cd fix && rm -f ${ofile} && cp -av ../${ifile} ${ofile} ) chmod a-w fix/${ofile} CREATING A TEST FILE Creating a daily series with exponential growth with gaps: make_test_series_file.gawk \ fix/2010-07-17--2014-02-25-MGOX-USD-01d.txt \ > fix/2010-07-17--2014-02-25-TEST-USD-01d.txt