diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml new file mode 100644 index 0000000..f5f990a --- /dev/null +++ b/.github/workflows/ruby.yml @@ -0,0 +1,53 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. +# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake +# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby + +name: Ruby + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + test: + services: + # Label used to access the service container + postgres: + # Docker Hub image + image: postgres + # Provide the password for postgres + env: + POSTGRES_PASSWORD: postgres + ports: + - 5432:5432 + # Set health checks to wait until postgres has started + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + + runs-on: ubuntu-latest + name: test (ruby v${{ matrix.ruby }}) + strategy: + matrix: + ruby: ["3.2", "3.3", "3.4"] + + steps: + - uses: actions/checkout@v2 + - name: Set up Ruby + # To automatically get bug fixes and new Ruby versions for ruby/setup-ruby, + # change this to (see https://github.com/ruby/setup-ruby#versioning): + uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{ matrix.ruby }} + bundler-cache: true + - name: Install dependencies + run: bundle install + - name: Run tests + run: bundle exec rake diff --git a/.gitignore b/.gitignore index c1e0daf..ce3cd4a 100644 --- a/.gitignore +++ b/.gitignore @@ -13,9 +13,17 @@ tmtags ## VIM *.swp +## RubyMine +.idea + ## PROJECT::GENERAL coverage rdoc pkg ## PROJECT::SPECIFIC +.project + +# RVM files +.ruby-version +.ruby-gemset diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index f86adb8..0000000 --- a/.travis.yml +++ /dev/null @@ -1,7 +0,0 @@ -language: ruby -rvm: - - 1.9.3 - -branches: - only: - - master diff --git a/Gemfile.lock b/Gemfile.lock index ac07f2e..7eff25c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,103 +1,66 @@ PATH remote: . specs: - postgres-copy (0.6.0) - activerecord (>= 3.0.0) - pg - rails (>= 3.0.0) - responders + postgres-copy (1.7.2) + activerecord (>= 5.1) + csv + pg (>= 0.17) GEM remote: https://rubygems.org/ specs: - actionmailer (3.2.13) - actionpack (= 3.2.13) - mail (~> 2.5.3) - actionpack (3.2.13) - activemodel (= 3.2.13) - activesupport (= 3.2.13) - builder (~> 3.0.0) - erubis (~> 2.7.0) - journey (~> 1.0.4) - rack (~> 1.4.5) - rack-cache (~> 1.2) - rack-test (~> 0.6.1) - sprockets (~> 2.2.1) - activemodel (3.2.13) - activesupport (= 3.2.13) - builder (~> 3.0.0) - activerecord (3.2.13) - activemodel (= 3.2.13) - activesupport (= 3.2.13) - arel (~> 3.0.2) - tzinfo (~> 0.3.29) - activeresource (3.2.13) - activemodel (= 3.2.13) - activesupport (= 3.2.13) - activesupport (3.2.13) - i18n (= 0.6.1) - multi_json (~> 1.0) - arel (3.0.2) - builder (3.0.4) - diff-lcs (1.1.3) - erubis (2.7.0) - hike (1.2.2) - i18n (0.6.1) - journey (1.0.4) - json (1.7.6) - mail (2.5.4) - mime-types (~> 1.16) - treetop (~> 1.4.8) - mime-types (1.23) - multi_json (1.7.3) - pg (0.15.1) - polyglot (0.3.3) - rack (1.4.5) - rack-cache (1.2) - rack (>= 0.4) - rack-ssl (1.3.3) - rack - rack-test (0.6.2) - rack (>= 1.0) - rails (3.2.13) - actionmailer (= 3.2.13) - actionpack (= 3.2.13) - activerecord (= 3.2.13) - activeresource (= 3.2.13) - activesupport (= 3.2.13) - bundler (~> 1.0) - railties (= 3.2.13) - railties (3.2.13) - actionpack (= 3.2.13) - activesupport (= 3.2.13) - rack-ssl (~> 1.3.2) - rake (>= 0.8.7) - rdoc (~> 3.4) - thor (>= 0.14.6, < 2.0) - rake (10.0.4) - rdoc (3.12) - json (~> 1.4) - responders (0.9.3) - railties (~> 3.1) - rspec (2.12.0) - rspec-core (~> 2.12.0) - rspec-expectations (~> 2.12.0) - rspec-mocks (~> 2.12.0) - rspec-core (2.12.2) - rspec-expectations (2.12.1) - diff-lcs (~> 1.1.3) - rspec-mocks (2.12.2) - sprockets (2.2.2) - hike (~> 1.2) - multi_json (~> 1.0) - rack (~> 1.0) - tilt (~> 1.1, != 1.3.0) - thor (0.18.1) - tilt (1.4.1) - treetop (1.4.12) - polyglot - polyglot (>= 0.3.1) - tzinfo (0.3.37) + activemodel (7.2.1) + activesupport (= 7.2.1) + activerecord (7.2.1) + activemodel (= 7.2.1) + activesupport (= 7.2.1) + timeout (>= 0.4.0) + activesupport (7.2.1) + base64 + bigdecimal + concurrent-ruby (~> 1.0, >= 1.3.1) + connection_pool (>= 2.2.5) + drb + i18n (>= 1.6, < 2) + logger (>= 1.4.2) + minitest (>= 5.1) + securerandom (>= 0.3) + tzinfo (~> 2.0, >= 2.0.5) + base64 (0.2.0) + bigdecimal (3.1.8) + concurrent-ruby (1.3.4) + connection_pool (2.4.1) + csv (3.3.0) + diff-lcs (1.5.1) + drb (2.2.1) + i18n (1.14.5) + concurrent-ruby (~> 1.0) + logger (1.6.1) + minitest (5.25.1) + pg (1.5.7) + psych (5.1.2) + stringio + rake (12.3.3) + rdoc (6.7.0) + psych (>= 4.0.0) + rspec (3.13.0) + rspec-core (~> 3.13.0) + rspec-expectations (~> 3.13.0) + rspec-mocks (~> 3.13.0) + rspec-core (3.13.0) + rspec-support (~> 3.13.0) + rspec-expectations (3.13.2) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.13.0) + rspec-mocks (3.13.1) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.13.0) + rspec-support (3.13.1) + securerandom (0.3.1) + stringio (3.1.1) + timeout (0.4.1) + tzinfo (2.0.6) + concurrent-ruby (~> 1.0) PLATFORMS ruby @@ -105,5 +68,9 @@ PLATFORMS DEPENDENCIES bundler postgres-copy! + rake (~> 12.3.3) rdoc - rspec (~> 2.12) + rspec (~> 3.0) + +BUNDLED WITH + 2.2.22 diff --git a/README.md b/README.md index 418b9b9..f94122a 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,13 @@ -# postgres-copy [![Build Status](https://travis-ci.org/diogob/postgres-copy.png?branch=master)](https://travis-ci.org/diogob/postgres-copy) +# postgres-copy + +![Ruby](https://github.com/diogob/postgres-copy/workflows/Ruby/badge.svg) This Gem will enable your AR models to use the PostgreSQL COPY command to import/export data in CSV format. If you need to tranfer data between a PostgreSQL database and CSV files, the PostgreSQL native CSV parser will give you a greater performance than using the ruby CSV+INSERT commands. I have not found time to make accurate benchmarks, but in the use scenario where I have developed the gem I have had a four-fold performance gain. -This gem was written having the Rails framework in mind, I think it could work only with active-record, +This gem was written having the Rails framework in mind, I think it could work only with active-record, but I will assume in this README that you are using Rails. ## Install @@ -18,22 +20,35 @@ Run the bundle command bundle +## IMPORTANT note about recent versions + +* Rails 4 users should use the version 0.7 and onward, while if you use Rails 3.2 stick with the 0.6 versions. +* Since version 0.8 all methods lost the prefix pg_ and they should be included in models thourgh acts_as_copy_target. + ## Usage -The gem will add two aditiontal class methods to ActiveRecord::Base: +To enable the copy commands in an ActiveRecord model called User you should use: +```ruby +class User < ActiveRecord::Base + acts_as_copy_target +end +``` -* pg_copy_to -* pg_copy_to_string -* pg_copy_from +This will add the additional class methods to your model: -### Using pg_copy_to and pg_copy_to_string +* copy_to +* copy_to_string +* copy_to_enumerator +* copy_from + +### Using copy_to and copy_to_string You can go to the rails console and try some cool things first. -The first and most basic use case, let's copy the enteire content of a database table to a CSV file on the database server disk. +The first and most basic use case, let's copy the entire content of a database table to a CSV file on the database server disk. Assuming we have a users table and a User AR model: ```ruby -User.pg_copy_to '/tmp/users.csv' +User.copy_to '/tmp/users.csv' ``` This will execute in the database the command: @@ -42,30 +57,40 @@ This will execute in the database the command: COPY (SELECT "users".* FROM "users" ) TO '/tmp/users.csv' WITH DELIMITER ',' CSV HEADER ``` -Remark that the file will be created in the database server disk. -But what if you want to write the lines in a file on the server that is running Rails, instead of the database? +Remark that the file will be created in the database server disk. +But what if you want to write the lines in a file on the server that is running Rails, instead of the database? In this case you can pass a block and retrieve the generated lines and then write them to a file: ```ruby File.open('/tmp/users.csv', 'w') do |f| - User.pg_copy_to do |line| + User.copy_to do |line| f.write line end end ``` -Or, if you have enough memory, you can read all table contents to a string using .pg_copy_to_string +Instead of yielding each line, you could return an enumerator with all users: +```ruby +enumerator = User.copy_to_enumerator +``` + +And for better performance when rendering the result of the enumerator, you can return an enumerator with blocks of 100 lines joined: +```ruby +enumerator = User.copy_to_enumerator(:buffer_lines => 100) +``` + +Or, if you have enough memory, you can read all table contents to a string using .copy_to_string ```ruby -puts User.pg_copy_to_string +puts User.copy_to_string ``` -Another insteresting feature of pg_copy_to is that it uses the scoped relation, it means that you can use ARel +Another interesting feature of copy_to is that it uses the scoped relation, it means that you can use ARel operations to generate different CSV files according to your needs. Assuming we want to generate a file only with the names of users 1, 2 and 3: ```ruby -User.select("name").where(:id => [1,2,3]).pg_copy_to "/tmp/users.csv" +User.select("name").where(:id => [1,2,3]).copy_to "/tmp/users.csv" ``` Which will generate the following SQL command: @@ -74,10 +99,22 @@ Which will generate the following SQL command: COPY (SELECT name FROM "users" WHERE "users"."id" IN (1, 2, 3)) TO '/tmp/users.csv' WITH DELIMITER ',' CSV HEADER ``` +Alternatively, you can supply customized raw SQL query to copy_to instead of scoped relation: + +```ruby +User.copy_to("/tmp/users.csv", query: 'SELECT count(*) as Total FROM users') +``` + +Which will generate the following SQL command: + +```sql +COPY (SELECT count(*) as Total FROM users) TO '/tmp/users.csv' WITH DELIMITER ',' CSV HEADER +``` + The COPY command also supports exporting the data in binary format. ```ruby -User.select("name").where(:id => [1,2,3]).pg_copy_to "/tmp/users.dat", :format => :binary +User.select("name").where(:id => [1,2,3]).copy_to "/tmp/users.dat", :format => :binary ``` Which will generate the following SQL command: @@ -89,46 +126,64 @@ COPY (SELECT name FROM "users" WHERE "users"."id" IN (1, 2, 3)) TO '/tmp/users.d The copy_to_string method also supports this ```ruby -puts User.pg_copy_to_string(:format => :binary) +puts User.copy_to_string(:format => :binary) ``` -### Using pg_copy_from +### Using copy_from -Now, if you want to copy data from a CSV file into the database, you can use the pg_copy_from method. +Now, if you want to copy data from a CSV file into the database, you can use the copy_from method. It will allow you to copy data from an arbritary IO object or from a file in the database server (when you pass the path as string). Let's first copy from a file in the database server, assuming again that we have a users table and that we are in the Rails console: ```ruby -User.pg_copy_from "/tmp/users.csv" +User.copy_from "/tmp/users.csv" ``` This command will use the headers in the CSV file as fields of the target table, so beware to always have a header in the files you want to import. If the column names in the CSV header do not match the field names of the target table, you can pass a map in the options parameter. ```ruby -User.pg_copy_from "/tmp/users.csv", :map => {'name' => 'first_name'} +User.copy_from "/tmp/users.csv", :map => {'name' => 'first_name'} ``` In the above example the header name in the CSV file will be mapped to the field called first_name in the users table. You can also manipulate and modify the values of the file being imported before they enter into the database using a block: ```ruby -User.pg_copy_from "/tmp/users.csv" do |row| +User.copy_from "/tmp/users.csv" do |row| row[0] = "fixed string" end ``` -The above extample will always change the value of the first column to "fixed string" before storing it into the database. +The above example will always change the value of the first column to "fixed string" before storing it into the database. For each iteration of the block row receives an array with the same order as the columns in the CSV file. +To specify NULL value you can pass the null option parameter. + +```ruby +User.copy_from "/tmp/users.csv", :null => 'null' +``` + +Match the specified columns' values against the null string, even if it has been quoted, and if a match is found set the value to NULL (Postgres 9.4+ only). + +```ruby +User.copy_from "/tmp/users.csv", :null => '', :force_null => [:name, :city] +``` + +To copy from tsv file , you can set format `:tsv` + +```ruby +User.copy_from "/tmp/users.tsv", :format => :tsv +``` + To copy a binary formatted data file or IO object you can specify the format as binary ```ruby -User.pg_copy_from "/tmp/users.dat", :format => :binary +User.copy_from "/tmp/users.dat", :format => :binary ``` NOTE: Columns must line up with the table unless you specify how they map to table columns. @@ -136,7 +191,7 @@ NOTE: Columns must line up with the table unless you specify how they map to tab To specify how the columns will map to the table you can specify the :columns option ```ruby -User.pg_copy_from "/tmp/users.dat", :format => :binary, :columns => [:id, :name] +User.copy_from "/tmp/users.dat", :format => :binary, :columns => [:id, :name] ``` Which will generate the following SQL command: @@ -145,6 +200,26 @@ Which will generate the following SQL command: COPY users (id, name) FROM '/tmp/users.dat' WITH BINARY ``` +To specify the encoding with which to read the file, set the :encoding option. +This is useful for removing byte order marks when matching column headers. + +```ruby +User.copy_from "/tmp/users_with_byte_order_mark.csv", :encoding => 'bom|utf-8' +``` + +### Using PostgresCopy::WithTempTable.generate + +Based on [nfedyashev](https://github.com/nfedyashev)'s [comment](https://github.com/diogob/postgres-copy/issues/51): + +```ruby + PostgresCopy::WithTempTable.generate do |t| + columns.each do |column_name| + t.string column_name.to_sym + end + end +``` + +This auto-generates an id column, but the temp table creation is configurable. ### Using the CSV Responder If you want to make the result of a COPY command available to download this gem provides a CSV responder that, in conjunction with [inherited_resources](https://github.com/josevalim/inherited_resources), is a very powerfull tool. BTW, do not try to use the responder without inherited_resources. diff --git a/VERSION b/VERSION deleted file mode 100644 index ad83b1b..0000000 --- a/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.5.6 \ No newline at end of file diff --git a/lib/postgres-copy.rb b/lib/postgres-copy.rb index bb9dd70..5eaf533 100644 --- a/lib/postgres-copy.rb +++ b/lib/postgres-copy.rb @@ -1,16 +1,10 @@ require 'rubygems' -require 'active_record' -require 'postgres-copy/active_record' -require 'rails' +require 'active_support' -class PostgresCopy < Rails::Railtie +ActiveSupport.on_load :active_record do + require "postgres-copy/acts_as_copy_target" +end - initializer 'postgres-copy' do - ActiveSupport.on_load :active_record do - require "postgres-copy/active_record" - end - ActiveSupport.on_load :action_controller do - require "postgres-copy/csv_responder" - end - end +ActiveSupport.on_load :action_controller do + require "postgres-copy/csv_responder" end diff --git a/lib/postgres-copy/active_record.rb b/lib/postgres-copy/active_record.rb deleted file mode 100644 index 142beb6..0000000 --- a/lib/postgres-copy/active_record.rb +++ /dev/null @@ -1,88 +0,0 @@ -module ActiveRecord - class Base - # Copy data to a file passed as a string (the file path) or to lines that are passed to a block - def self.pg_copy_to path = nil, options = {} - options = {:delimiter => ",", :format => :csv, :header => true}.merge(options) - options_string = if options[:format] == :binary - "BINARY" - else - "DELIMITER '#{options[:delimiter]}' CSV #{options[:header] ? 'HEADER' : ''}" - end - - if path - raise "You have to choose between exporting to a file or receiving the lines inside a block" if block_given? - connection.execute "COPY (#{self.scoped.to_sql}) TO #{sanitize(path)} WITH #{options_string}" - else - connection.execute "COPY (#{self.scoped.to_sql}) TO STDOUT WITH #{options_string}" - while line = connection.raw_connection.get_copy_data do - yield(line) if block_given? - end - end - return self - end - - # Copy all data to a single string - def self.pg_copy_to_string options = {} - data = '' - self.pg_copy_to(nil, options){|l| data << l } - if options[:format] == :binary - data.force_encoding("ASCII-8BIT") - end - data - end - - # Copy data from a CSV that can be passed as a string (the file path) or as an IO object. - # * You can change the default delimiter passing delimiter: '' in the options hash - # * You can map fields from the file to different fields in the table using a map in the options hash - # * For further details on usage take a look at the README.md - def self.pg_copy_from path_or_io, options = {} - options = {:delimiter => ",", :format => :csv, :header => true}.merge(options) - options_string = if options[:format] == :binary - "BINARY" - else - "DELIMITER '#{options[:delimiter]}' CSV" - end - io = path_or_io.instance_of?(String) ? File.open(path_or_io, 'r') : path_or_io - - if options[:format] == :binary - columns_list = options[:columns] || [] - elsif options[:header] - line = io.gets - columns_list = options[:columns] || line.strip.split(options[:delimiter]) - else - columns_list = options[:columns] - end - - table = if options[:table] - connection.quote_table_name(options[:table]) - else - quoted_table_name - end - - columns_list = columns_list.map{|c| options[:map][c.to_s] } if options[:map] - columns_string = columns_list.size > 0 ? "(\"#{columns_list.join('","')}\")" : "" - connection.execute %{COPY #{table} #{columns_string} FROM STDIN #{options_string}} - if options[:format] == :binary - bytes = 0 - begin - while line = io.readpartial(10240) - connection.raw_connection.put_copy_data line - bytes += line.bytesize - end - rescue EOFError - end - else - while line = io.gets do - next if line.strip.size == 0 - if block_given? - row = line.strip.split(options[:delimiter]) - yield(row) - line = row.join(options[:delimiter]) + "\n" - end - connection.raw_connection.put_copy_data line - end - end - connection.raw_connection.put_copy_end - end - end -end diff --git a/lib/postgres-copy/acts_as_copy_target.rb b/lib/postgres-copy/acts_as_copy_target.rb new file mode 100644 index 0000000..64e2e68 --- /dev/null +++ b/lib/postgres-copy/acts_as_copy_target.rb @@ -0,0 +1,163 @@ +require 'csv' + +def get_file_mode mode, encoding = nil + if encoding + "#{mode}:#{encoding}" + else + mode + end +end + +module PostgresCopy + module ActsAsCopyTarget + extend ActiveSupport::Concern + + included do + end + + module CopyMethods + # Copy data to a file passed as a string (the file path) or to lines that are passed to a block + def copy_to path = nil, options = {} + options = { delimiter: ",", format: :csv, header: true }.merge(options) + options_string = if options[:format] == :binary + "BINARY" + else + "DELIMITER '#{options[:delimiter]}' CSV #{options[:header] ? 'HEADER' : ''}" + end + options_query = options.delete(:query) || self.all.to_sql + + if path + raise "You have to choose between exporting to a file or receiving the lines inside a block" if block_given? + connection.execute "COPY (#{options_query}) TO '#{sanitize_sql(path)}' WITH #{options_string}" + else + connection.raw_connection.copy_data "COPY (#{options_query}) TO STDOUT WITH #{options_string}" do + while line = connection.raw_connection.get_copy_data do + yield(line) if block_given? + end + end + end + return self + end + + # Create an enumerator with each line from the CSV. + # Note that using this directly in a controller response + # will perform very poorly as each line will get put + # into its own chunk. Joining every (eg) 100 rows together + # is much, much faster. + def copy_to_enumerator(options={}) + buffer_lines = options.delete(:buffer_lines) + # Somehow, self loses its scope once inside the Enumerator + scope = self.current_scope || self + result = Enumerator.new do |y| + scope.copy_to(nil, options) do |line| + y << line + end + end + + if buffer_lines.to_i > 0 + Enumerator.new do |y| + result.each_slice(buffer_lines.to_i) do |slice| + y << slice.join + end + end + else + result + end + end + + # Copy all data to a single string + def copy_to_string options = {} + data = '' + self.copy_to(nil, options){|l| data << l } + if options[:format] == :binary + data.force_encoding("ASCII-8BIT") + end + data + end + + # Copy data from a CSV that can be passed as a string (the file path) or as an IO object. + # * You can change the default delimiter passing delimiter: '' in the options hash + # * You can map fields from the file to different fields in the table using a map in the options hash + # * For further details on usage take a look at the README.md + def copy_from path_or_io, options = {} + options = { delimiter: ",", format: :csv, header: true, quote: '"' }.merge(options) + options[:delimiter] = "\t" if options[:format] == :tsv + options_string = if options[:format] == :binary + "BINARY" + else + quote = options[:quote] == "'" ? "''" : options[:quote] + null = options.key?(:null) ? "NULL '#{options[:null]}'" : nil + force_null = options.key?(:force_null) ? "FORCE_NULL(#{options[:force_null].join(',')})" : nil + delimiter = options[:format] == :tsv ? "E'\t'" : "'#{options[:delimiter]}'" + "WITH (" + ["DELIMITER #{delimiter}", "QUOTE '#{quote}'", null, force_null, "FORMAT CSV"].compact.join(', ') + ")" + end + io = path_or_io.instance_of?(String) ? File.open(path_or_io, get_file_mode('r', options[:encoding])) : path_or_io + + if options[:format] == :binary + columns_list = options[:columns] || [] + elsif options[:header] + line = io.gets + columns_list = options[:columns] || line.strip.split(options[:delimiter]) + else + columns_list = options[:columns] + end + + table = if options[:table] + connection.quote_table_name(options[:table]) + else + quoted_table_name + end + + columns_list = columns_list.map{|c| options[:map][c.to_s] || c.to_s } if options[:map] + columns_string = columns_list.size > 0 ? "(\"#{columns_list.join('","')}\")" : "" + connection.raw_connection.copy_data %{COPY #{table} #{columns_string} FROM STDIN #{options_string}} do + if options[:format] == :binary + bytes = 0 + begin + while line = io.readpartial(10240) + connection.raw_connection.put_copy_data line + bytes += line.bytesize + end + rescue EOFError + end + else + line_buffer = '' + + while line = io.gets do + next if line.strip.size == 0 + + line_buffer += line + + # If line is incomplete, get the next line until it terminates + if line_buffer =~ /\n$/ || line_buffer =~ /\Z/ + if block_given? + begin + row = CSV.parse_line(line_buffer.strip, col_sep: options[:delimiter]) + yield(row) + next if row.all?(&:nil?) + line_buffer = CSV.generate_line(row, col_sep: options[:delimiter]) + rescue CSV::MalformedCSVError + next + end + end + + connection.raw_connection.put_copy_data(line_buffer) + + # Clear the buffer + line_buffer = '' + end + end + end + end + end + end + + module ClassMethods + def acts_as_copy_target + extend PostgresCopy::ActsAsCopyTarget::CopyMethods + end + end + end +end + +ActiveRecord::Base.send :include, PostgresCopy::ActsAsCopyTarget diff --git a/lib/postgres-copy/csv_responder.rb b/lib/postgres-copy/csv_responder.rb index a3e0321..36a856c 100644 --- a/lib/postgres-copy/csv_responder.rb +++ b/lib/postgres-copy/csv_responder.rb @@ -2,7 +2,7 @@ module Responders module CsvResponder def to_csv controller.response_body = Enumerator.new do |y| - controller.send(:end_of_association_chain).pg_copy_to do |line| + controller.send(:end_of_association_chain).copy_to do |line| y << line end end diff --git a/lib/postgres-copy/with_temp_table.rb b/lib/postgres-copy/with_temp_table.rb new file mode 100644 index 0000000..c571294 --- /dev/null +++ b/lib/postgres-copy/with_temp_table.rb @@ -0,0 +1,18 @@ +module PostgresCopy + module WithTempTable + def self.generate(connection = ActiveRecord::Base.connection, base_klass: + ActiveRecord::Base, temp_table_name: nil, create_table_opts: {id: :bigint}) + raise "You have to pass a table schema definition block!" unless block_given? + table_name = temp_table_name || "temp_table_#{SecureRandom.hex}" + + connection.create_table table_name, temporary: true, **create_table_opts do |t| + yield t + end + + klass = Class.new(base_klass) do + acts_as_copy_target + self.table_name = table_name + end + end + end +end diff --git a/postgres-copy.gemspec b/postgres-copy.gemspec index f5aa6d2..08b320e 100644 --- a/postgres-copy.gemspec +++ b/postgres-copy.gemspec @@ -4,30 +4,28 @@ $:.unshift lib unless $:.include?(lib) Gem::Specification.new do |s| - s.name = "postgres-copy" - s.version = "0.6.0" + s.name = "postgres-copy" + s.version = "1.7.2" + s.platform = Gem::Platform::RUBY + s.required_ruby_version = ">= 1.9.3" + s.authors = ["Diogo Biazus"] + s.description = "Now you can use the super fast COPY for import/export data directly from your AR models." + s.email = "diogob@gmail.com" + git_files = `git ls-files`.split("\n") rescue '' + s.files = git_files + s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") + s.executables = [] + s.require_paths = %w(lib) + s.homepage = "http://github.com/diogob/postgres-copy" + s.require_paths = ["lib"] + s.summary = "Put COPY command functionality in ActiveRecord's model class" - s.platform = Gem::Platform::RUBY - s.required_ruby_version = ">= 1.8.7" - s.authors = ["Diogo Biazus"] - s.date = "2013-01-31" - s.description = "Now you can use the super fast COPY for import/export data directly from your AR models." - s.email = "diogob@gmail.com" - git_files = `git ls-files`.split("\n") rescue '' - s.files = git_files - s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") - s.executables = [] - s.require_paths = %w(lib) - s.homepage = "http://github.com/diogob/postgres-copy" - s.require_paths = ["lib"] - s.summary = "Put COPY command functionality in ActiveRecord's model class" - - s.add_dependency "pg" - s.add_dependency "activerecord", '>= 3.0.0' - s.add_dependency "rails", '>= 3.0.0' - s.add_dependency "responders" + s.add_dependency "pg", ">= 0.17" + s.add_dependency "activerecord", '>= 5.1' + s.add_dependency "csv" s.add_development_dependency "bundler" s.add_development_dependency "rdoc" - s.add_development_dependency "rspec", "~> 2.12" + s.add_development_dependency "rspec", "~> 3.0" + s.add_development_dependency "rake", "~> 12.3.3" end diff --git a/spec/copy_from_binary_spec.rb b/spec/copy_from_binary_spec.rb new file mode 100644 index 0000000..d17bc65 --- /dev/null +++ b/spec/copy_from_binary_spec.rb @@ -0,0 +1,22 @@ +require File.expand_path(File.dirname(__FILE__) + '/spec_helper') + +describe "COPY FROM BINARY" do + before(:each) do + ActiveRecord::Base.connection.execute %{ + TRUNCATE TABLE test_models; + SELECT setval('test_models_id_seq', 1, false); +} + end + + it "should import from file if path is passed without field_map" do + TestModel.copy_from File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary, columns: [:id, :data] + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'text'}] + end + + it "should import from file if columns are not specified" do + TestModel.copy_from File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'text'}] + end + +end + diff --git a/spec/copy_from_spec.rb b/spec/copy_from_spec.rb new file mode 100644 index 0000000..c3c357e --- /dev/null +++ b/spec/copy_from_spec.rb @@ -0,0 +1,188 @@ +require File.expand_path(File.dirname(__FILE__) + '/spec_helper') + +describe "COPY FROM" do + before(:each) do + ActiveRecord::Base.connection.execute %{ + TRUNCATE TABLE test_models; + TRUNCATE TABLE test_extended_models; + SELECT setval('test_models_id_seq', 1, false); + } + end + + it "should import from file if path is passed without field_map" do + TestModel.copy_from File.expand_path('spec/fixtures/comma_with_header.csv') + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] + end + + it "should import from file if path is with a field_map" do + TestModel.copy_from File.expand_path('spec/fixtures/comma_with_header_to_map.csv'), map: {'ref' => 'id'} + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] + end + + + it "should import from IO without field_map" do + TestModel.copy_from File.open(File.expand_path('spec/fixtures/comma_with_header.csv'), 'r') + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] + end + + it "should import with custom delimiter from path" do + TestModel.copy_from File.expand_path('spec/fixtures/semicolon_with_header.csv'), :delimiter => ';' + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] + end + + it "should import with custom delimiter from IO" do + TestModel.copy_from File.open(File.expand_path('spec/fixtures/semicolon_with_header.csv'), 'r'), :delimiter => ';' + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] + end + + it "should import and allow changes in block" do + TestModel.copy_from(File.open(File.expand_path('spec/fixtures/comma_with_header.csv'), 'r')) do |row| + row[1] = 'changed this data' + end + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'changed this data'}] + end + + it "should import 2 lines and allow changes in block" do + TestModel.copy_from(File.open(File.expand_path('spec/fixtures/tab_with_two_lines.csv'), 'r'), :delimiter => "\t") do |row| + row[1] = 'changed this data' + end + TestModel.order(:id).first.attributes.should == {'id' => 1, 'data' => 'changed this data'} + TestModel.count.should == 2 + end + + it "should be able to copy from using custom set of columns" do + TestModel.copy_from(File.open(File.expand_path('spec/fixtures/tab_only_data.csv'), 'r'), :delimiter => "\t", :columns => ["data"]) + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] + end + + it "default set of columns should be all table columns minus [id, created_at, updated_at]" do + ExtraField.copy_from(File.open(File.expand_path('spec/fixtures/comma_with_header.csv'), 'r')) + ExtraField.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1', 'created_at' => nil, 'updated_at' => nil}] + end + + it "should not expect a header when :header is false" do + TestModel.copy_from(File.open(File.expand_path('spec/fixtures/comma_without_header.csv'), 'r'), :header => false, :columns => [:id,:data]) + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] + end + + it "should use the table name given by :table" do + ExtraField.copy_from(File.open(File.expand_path('spec/fixtures/comma_without_header.csv'), 'r'), :header => false, :columns => [:id,:data], :table => "test_models") + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] + end + + it "should be able to map the header in the file to diferent column names" do + TestModel.copy_from(File.open(File.expand_path('spec/fixtures/tab_with_different_header.csv'), 'r'), :delimiter => "\t", :map => {'cod' => 'id', 'info' => 'data'}) + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] + end + + it "should be able to map the header in the file to diferent column names with custom delimiter" do + TestModel.copy_from(File.open(File.expand_path('spec/fixtures/semicolon_with_different_header.csv'), 'r'), :delimiter => ';', :map => {'cod' => 'id', 'info' => 'data'}) + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] + end + + it "should ignore empty lines" do + TestModel.copy_from(File.open(File.expand_path('spec/fixtures/tab_with_extra_line.csv'), 'r'), :delimiter => "\t") + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] + end + + it "should ignore all-nil rows" do + lambda do + TestModel.copy_from(File.open(File.expand_path('spec/fixtures/tab_with_error.csv'), 'r'), :delimiter => "\t") do |row| + 0.upto(row.length) {|idx| row[idx] = nil} + end + end.should_not raise_error + TestModel.order(:id).map{|r| r.attributes}.should == [] + end + + #we should implement this later + #it "should raise error in malformed files" do + #lambda do + #TestModel.copy_from(File.open(File.expand_path('spec/fixtures/tab_with_error.csv'), 'r')) + #end.should raise_error + #TestModel.order(:id).map{|r| r.attributes}.should == [] + #end + + it "should copy from even when table fields need identifier quoting" do + ReservedWordModel.copy_from File.expand_path('spec/fixtures/reserved_words.csv'), :delimiter => "\t" + ReservedWordModel.order(:id).map{|r| r.attributes}.should == [{"group"=>"group name", "id"=>1, "select"=>"test select"}] + end + + it "should import even last columns have empty values" do + TestExtendedModel.copy_from File.expand_path('spec/fixtures/comma_with_header_empty_values_at_the_end.csv') + TestExtendedModel.order(:id).map{|r| r.attributes}.should == + [{"id"=>1, "data"=>"test data 1", "more_data"=>nil, "other_data"=>nil, "final_data"=>nil}, + {"id"=>2, "data"=>"test data 2", "more_data"=>"9", "other_data"=>nil, "final_data"=>nil}, + {"id"=>3, "data"=>"test data 2", "more_data"=>"9", "other_data"=>nil, "final_data"=>"0"}] + end + + it "should import even last columns have empty values with block" do + TestExtendedModel.copy_from File.expand_path('spec/fixtures/comma_with_header_empty_values_at_the_end.csv') do |row| + row[4]="666" + end + TestExtendedModel.order(:id).map{|r| r.attributes}.should == + [{"id"=>1, "data"=>"test data 1", "more_data"=>nil, "other_data"=>nil, "final_data"=>"666"}, + {"id"=>2, "data"=>"test data 2", "more_data"=>"9", "other_data"=>nil, "final_data"=>"666"}, + {"id"=>3, "data"=>"test data 2", "more_data"=>"9", "other_data"=>nil, "final_data"=>"666"}] + end + + it "should import lines with single quotes" do + TestModel.copy_from(File.open(File.expand_path('spec/fixtures/semicolon_with_quote.csv'), 'r'), :delimiter => ";", :quote => "'") + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test "data" 1'}] + end + + it "should import lines with commas inside fields with default options" do + TestModel.copy_from(File.open(File.expand_path('spec/fixtures/comma_inside_field.csv'), 'r')) + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test, again'}] + end + + it "should import lines with commas inside fields with block given" do + File.open(File.expand_path('spec/fixtures/comma_inside_field.csv'), 'r') do |file| + TestModel.copy_from(file) do |row| + # since our CSV line look like this: {1,"test, again"} we expect only two elements withing row + row.size.should == 2 + row[0].should == '1' + row[1].should == 'test, again' + end + end + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test, again'}] + end + + it "should import with custom null expression from path" do + TestModel.copy_from File.expand_path('spec/fixtures/special_null_with_header.csv'), :null => 'NULL' + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => nil}] + end + + it "should import with custom null expression from IO" do + TestModel.copy_from File.open(File.expand_path('spec/fixtures/special_null_with_header.csv'), 'r'), :null => 'NULL' + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => nil}] + end + + it "should import with a carriage return in the value" do + TestModel.copy_from File.expand_path('spec/fixtures/comma_with_carriage_returns.csv') + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => "test\ndata 1"}] + end + + it "should import custom force null expressions from path" do + TestModel.copy_from File.expand_path('spec/fixtures/comma_with_empty_string.csv'), :null => '', :force_null => [:data] + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => nil}] + end + + it "should import tsv from path" do + TestModel.copy_from File.expand_path('spec/fixtures/tab_with_header.tsv'), :format => :tsv + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] + end + + it "should import 2 lines from tsv and allow changes in block" do + TestModel.copy_from(File.open(File.expand_path('spec/fixtures/tab_with_two_lines.tsv'), 'r'), :format => :tsv) do |row| + row[1] = 'changed this data' + end + TestModel.order(:id).first.attributes.should == {'id' => 1, 'data' => 'changed this data'} + TestModel.count.should == 2 + end + + it "should import csv headers with BOM when provided encoding option" do + TestModel.copy_from File.expand_path("spec/fixtures/comma_with_bom.csv"), :encoding => "bom|utf-8" + TestModel.order(:id).map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] + end + +end diff --git a/spec/pg_copy_to_binary_spec.rb b/spec/copy_to_binary_spec.rb similarity index 55% rename from spec/pg_copy_to_binary_spec.rb rename to spec/copy_to_binary_spec.rb index 225d28f..cd1d058 100644 --- a/spec/pg_copy_to_binary_spec.rb +++ b/spec/copy_to_binary_spec.rb @@ -11,23 +11,12 @@ describe "should allow binary output to string" do context "with only binary option" do - subject{ TestModel.pg_copy_to_string(:format => :binary) } + subject{ TestModel.copy_to_string(:format => :binary) } it{ should == File.open('spec/fixtures/2_col_binary_data.dat', 'r:ASCII-8BIT').read } end context "with custom select" do - subject{ TestModel.select("id, data").pg_copy_to_string(:format => :binary) } + subject{ TestModel.select("id, data").copy_to_string(:format => :binary) } it{ should == File.open('spec/fixtures/2_col_binary_data.dat', 'r:ASCII-8BIT').read } end end - - describe "should allow binary output to file" do - it "should copy to disk if block is not given and a path is passed" do - TestModel.pg_copy_to '/tmp/export.dat', :format => :binary - str = File.open('/tmp/export.dat', 'r:ASCII-8BIT').read - - str.should == File.open('spec/fixtures/2_col_binary_data.dat', 'r:ASCII-8BIT').read - - end - end - end diff --git a/spec/copy_to_spec.rb b/spec/copy_to_spec.rb new file mode 100644 index 0000000..be8516f --- /dev/null +++ b/spec/copy_to_spec.rb @@ -0,0 +1,81 @@ +require File.expand_path(File.dirname(__FILE__) + '/spec_helper') + +describe "COPY TO" do + before(:each) do + ActiveRecord::Base.connection.execute %{ + TRUNCATE TABLE test_models; + SELECT setval('test_models_id_seq', 1, false); + } + TestModel.create :data => 'test data 1' + end + + describe ".copy_to_string" do + context "with no options" do + subject{ TestModel.copy_to_string } + it{ should == File.open('spec/fixtures/comma_with_header.csv', 'r').read } + end + + context "with tab as delimiter" do + subject{ TestModel.copy_to_string :delimiter => "\t" } + it{ should == File.open('spec/fixtures/tab_with_header.csv', 'r').read } + end + end + + describe ".copy_to_enumerator" do + before(:each) do + TestModel.create :data => 'test data 2' + TestModel.create :data => 'test data 3' + TestModel.create :data => 'test data 4' + end + + context "with no options" do + subject{ TestModel.copy_to_enumerator.to_a } + it{ should == File.open('spec/fixtures/comma_with_header_multi.csv', 'r').read.lines } + end + + context "with tab as delimiter" do + subject{ TestModel.copy_to_enumerator(:delimiter => "\t").to_a } + it{ should == File.open('spec/fixtures/tab_with_header_multi.csv', 'r').read.lines } + end + + context "with many records" do + context "enumerating in batches" do + subject{ TestModel.copy_to_enumerator(:buffer_lines => 2).to_a } + it do + expected = [] + File.open('spec/fixtures/comma_with_header_multi.csv', 'r').read.lines.each_slice(2){|s| expected << s.join } + should == expected + end + end + + context "excluding some records via a scope" do + subject{ TestModel.where("data not like '%3'").copy_to_enumerator.to_a } + it{ should == File.open('spec/fixtures/comma_with_header_and_scope.csv', 'r').read.lines } + end + end + end + + describe ".copy_to" do + it "should copy and pass data to block if block is given and no path is passed" do + File.open('spec/fixtures/comma_with_header.csv', 'r') do |f| + TestModel.copy_to do |row| + row.should == f.readline + end + end + end + + it "should raise exception if I pass a path and a block simultaneously" do + lambda do + TestModel.copy_to('/tmp/bogus_path') do |row| + end + end.should raise_error + end + + it "accepts custom sql query to run instead on the current relation" do + TestModel.copy_to(nil, query: 'SELECT count(*) as "Total" FROM test_models') do |row| + expect(row).to eq("Total\n") + break + end + end + end +end diff --git a/spec/fixtures/comma_inside_field.csv b/spec/fixtures/comma_inside_field.csv new file mode 100644 index 0000000..650a0e1 --- /dev/null +++ b/spec/fixtures/comma_inside_field.csv @@ -0,0 +1,2 @@ +id,data +1,"test, again" diff --git a/spec/fixtures/comma_with_bom.csv b/spec/fixtures/comma_with_bom.csv new file mode 100644 index 0000000..df635db --- /dev/null +++ b/spec/fixtures/comma_with_bom.csv @@ -0,0 +1,2 @@ +id,data +1,"test data 1" diff --git a/spec/fixtures/comma_with_carriage_returns.csv b/spec/fixtures/comma_with_carriage_returns.csv new file mode 100644 index 0000000..c76c15a --- /dev/null +++ b/spec/fixtures/comma_with_carriage_returns.csv @@ -0,0 +1,3 @@ +id,data +1,"test +data 1" \ No newline at end of file diff --git a/spec/fixtures/comma_with_empty_string.csv b/spec/fixtures/comma_with_empty_string.csv new file mode 100644 index 0000000..8e02be6 --- /dev/null +++ b/spec/fixtures/comma_with_empty_string.csv @@ -0,0 +1,2 @@ +id,data +1,"" diff --git a/spec/fixtures/comma_with_header_and_scope.csv b/spec/fixtures/comma_with_header_and_scope.csv new file mode 100644 index 0000000..563ccb0 --- /dev/null +++ b/spec/fixtures/comma_with_header_and_scope.csv @@ -0,0 +1,4 @@ +id,data +1,test data 1 +2,test data 2 +4,test data 4 diff --git a/spec/fixtures/comma_with_header_empty_values_at_the_end.csv b/spec/fixtures/comma_with_header_empty_values_at_the_end.csv new file mode 100644 index 0000000..b7f0bfc --- /dev/null +++ b/spec/fixtures/comma_with_header_empty_values_at_the_end.csv @@ -0,0 +1,4 @@ +id,data,more_data,other_data,final_data +1,test data 1,,, +2,test data 2,9,, +3,test data 2,9,,0 diff --git a/spec/fixtures/comma_with_header_multi.csv b/spec/fixtures/comma_with_header_multi.csv new file mode 100644 index 0000000..5b680f3 --- /dev/null +++ b/spec/fixtures/comma_with_header_multi.csv @@ -0,0 +1,5 @@ +id,data +1,test data 1 +2,test data 2 +3,test data 3 +4,test data 4 diff --git a/spec/fixtures/comma_with_header_to_map.csv b/spec/fixtures/comma_with_header_to_map.csv new file mode 100644 index 0000000..b85c91c --- /dev/null +++ b/spec/fixtures/comma_with_header_to_map.csv @@ -0,0 +1,2 @@ +ref,data +1,test data 1 diff --git a/spec/fixtures/extra_field.rb b/spec/fixtures/extra_field.rb index e297ab4..f9ce2fd 100644 --- a/spec/fixtures/extra_field.rb +++ b/spec/fixtures/extra_field.rb @@ -1,5 +1,6 @@ require 'postgres-copy' class ExtraField < ActiveRecord::Base + acts_as_copy_target end diff --git a/spec/fixtures/reserved_word_model.rb b/spec/fixtures/reserved_word_model.rb index 89a3992..d196dff 100644 --- a/spec/fixtures/reserved_word_model.rb +++ b/spec/fixtures/reserved_word_model.rb @@ -1,5 +1,6 @@ require 'postgres-copy' class ReservedWordModel < ActiveRecord::Base + acts_as_copy_target end diff --git a/spec/fixtures/semicolon_with_quote.csv b/spec/fixtures/semicolon_with_quote.csv new file mode 100644 index 0000000..bee48e5 --- /dev/null +++ b/spec/fixtures/semicolon_with_quote.csv @@ -0,0 +1,2 @@ +id;data +1;'test "data" 1' diff --git a/spec/fixtures/special_null_with_header.csv b/spec/fixtures/special_null_with_header.csv new file mode 100644 index 0000000..ab6d294 --- /dev/null +++ b/spec/fixtures/special_null_with_header.csv @@ -0,0 +1,2 @@ +id,data +1,NULL \ No newline at end of file diff --git a/spec/fixtures/tab_with_header.tsv b/spec/fixtures/tab_with_header.tsv new file mode 100644 index 0000000..d522d21 --- /dev/null +++ b/spec/fixtures/tab_with_header.tsv @@ -0,0 +1,2 @@ +id data +1 test data 1 diff --git a/spec/fixtures/tab_with_header_multi.csv b/spec/fixtures/tab_with_header_multi.csv new file mode 100644 index 0000000..ffe9af4 --- /dev/null +++ b/spec/fixtures/tab_with_header_multi.csv @@ -0,0 +1,5 @@ +id data +1 test data 1 +2 test data 2 +3 test data 3 +4 test data 4 diff --git a/spec/fixtures/tab_with_two_lines.tsv b/spec/fixtures/tab_with_two_lines.tsv new file mode 100644 index 0000000..aaac52c --- /dev/null +++ b/spec/fixtures/tab_with_two_lines.tsv @@ -0,0 +1,3 @@ +id data +1 test data 1 +2 test data 2 diff --git a/spec/fixtures/test_extended_model.rb b/spec/fixtures/test_extended_model.rb new file mode 100644 index 0000000..e5d218a --- /dev/null +++ b/spec/fixtures/test_extended_model.rb @@ -0,0 +1,5 @@ +require 'postgres-copy' + +class TestExtendedModel < ActiveRecord::Base + acts_as_copy_target +end diff --git a/spec/fixtures/test_model.rb b/spec/fixtures/test_model.rb index ef7d18b..a4b82fb 100644 --- a/spec/fixtures/test_model.rb +++ b/spec/fixtures/test_model.rb @@ -1,4 +1,5 @@ require 'postgres-copy' class TestModel < ActiveRecord::Base + acts_as_copy_target end diff --git a/spec/pg_copy_from_binary_spec.rb b/spec/pg_copy_from_binary_spec.rb deleted file mode 100644 index 9af2886..0000000 --- a/spec/pg_copy_from_binary_spec.rb +++ /dev/null @@ -1,22 +0,0 @@ -require File.expand_path(File.dirname(__FILE__) + '/spec_helper') - -describe "COPY FROM BINARY" do - before(:each) do - ActiveRecord::Base.connection.execute %{ - TRUNCATE TABLE test_models; - SELECT setval('test_models_id_seq', 1, false); -} - end - - it "should import from file if path is passed without field_map" do - TestModel.pg_copy_from File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary, columns: [:id, :data] - TestModel.order(:id).all.map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'text'}] - end - - it "should import from file if columns are not specified" do - TestModel.pg_copy_from File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary - TestModel.order(:id).all.map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'text'}] - end - -end - diff --git a/spec/pg_copy_from_spec.rb b/spec/pg_copy_from_spec.rb deleted file mode 100644 index 949c0f9..0000000 --- a/spec/pg_copy_from_spec.rb +++ /dev/null @@ -1,94 +0,0 @@ -require File.expand_path(File.dirname(__FILE__) + '/spec_helper') - -describe "COPY FROM" do - before(:each) do - ActiveRecord::Base.connection.execute %{ - TRUNCATE TABLE test_models; - SELECT setval('test_models_id_seq', 1, false); - } - end - - it "should import from file if path is passed without field_map" do - TestModel.pg_copy_from File.expand_path('spec/fixtures/comma_with_header.csv') - TestModel.order(:id).all.map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] - end - - it "should import from IO without field_map" do - TestModel.pg_copy_from File.open(File.expand_path('spec/fixtures/comma_with_header.csv'), 'r') - TestModel.order(:id).all.map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] - end - - it "should import with custom delimiter from path" do - TestModel.pg_copy_from File.expand_path('spec/fixtures/semicolon_with_header.csv'), :delimiter => ';' - TestModel.order(:id).all.map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] - end - - it "should import with custom delimiter from IO" do - TestModel.pg_copy_from File.open(File.expand_path('spec/fixtures/semicolon_with_header.csv'), 'r'), :delimiter => ';' - TestModel.order(:id).all.map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] - end - - it "should import and allow changes in block" do - TestModel.pg_copy_from(File.open(File.expand_path('spec/fixtures/comma_with_header.csv'), 'r')) do |row| - row[1] = 'changed this data' - end - TestModel.order(:id).all.map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'changed this data'}] - end - - it "should import 2 lines and allow changes in block" do - TestModel.pg_copy_from(File.open(File.expand_path('spec/fixtures/tab_with_two_lines.csv'), 'r'), :delimiter => "\t") do |row| - row[1] = 'changed this data' - end - TestModel.order(:id).first.attributes.should == {'id' => 1, 'data' => 'changed this data'} - TestModel.count.should == 2 - end - - it "should be able to copy from using custom set of columns" do - TestModel.pg_copy_from(File.open(File.expand_path('spec/fixtures/tab_only_data.csv'), 'r'), :delimiter => "\t", :columns => ["data"]) - TestModel.order(:id).all.map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] - end - - it "default set of columns should be all table columns minus [id, created_at, updated_at]" do - ExtraField.pg_copy_from(File.open(File.expand_path('spec/fixtures/comma_with_header.csv'), 'r')) - ExtraField.order(:id).all.map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1', 'created_at' => nil, 'updated_at' => nil}] - end - - it "should not expect a header when :header is false" do - TestModel.pg_copy_from(File.open(File.expand_path('spec/fixtures/comma_without_header.csv'), 'r'), :header => false, :columns => [:id,:data]) - TestModel.order(:id).all.map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] - end - - it "should use the table name given by :table" do - ActiveRecord::Base.pg_copy_from(File.open(File.expand_path('spec/fixtures/comma_without_header.csv'), 'r'), :header => false, :columns => [:id,:data], :table => "test_models") - TestModel.order(:id).all.map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] - end - - it "should be able to map the header in the file to diferent column names" do - TestModel.pg_copy_from(File.open(File.expand_path('spec/fixtures/tab_with_different_header.csv'), 'r'), :delimiter => "\t", :map => {'cod' => 'id', 'info' => 'data'}) - TestModel.order(:id).all.map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] - end - - it "should be able to map the header in the file to diferent column names with custom delimiter" do - TestModel.pg_copy_from(File.open(File.expand_path('spec/fixtures/semicolon_with_different_header.csv'), 'r'), :delimiter => ';', :map => {'cod' => 'id', 'info' => 'data'}) - TestModel.order(:id).all.map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] - end - - it "should ignore empty lines" do - TestModel.pg_copy_from(File.open(File.expand_path('spec/fixtures/tab_with_extra_line.csv'), 'r'), :delimiter => "\t") - TestModel.order(:id).all.map{|r| r.attributes}.should == [{'id' => 1, 'data' => 'test data 1'}] - end - - #we should implement this later - #it "should raise error in malformed files" do - #lambda do - #TestModel.pg_copy_from(File.open(File.expand_path('spec/fixtures/tab_with_error.csv'), 'r')) - #end.should raise_error - #TestModel.order(:id).all.map{|r| r.attributes}.should == [] - #end - - it "should copy from even when table fields need identifier quoting" do - ReservedWordModel.pg_copy_from File.expand_path('spec/fixtures/reserved_words.csv'), :delimiter => "\t" - ReservedWordModel.order(:id).all.map{|r| r.attributes}.should == [{"group"=>"group name", "id"=>1, "select"=>"test select"}] - end -end - diff --git a/spec/pg_copy_to_spec.rb b/spec/pg_copy_to_spec.rb deleted file mode 100644 index 9715402..0000000 --- a/spec/pg_copy_to_spec.rb +++ /dev/null @@ -1,49 +0,0 @@ -require File.expand_path(File.dirname(__FILE__) + '/spec_helper') - -describe "COPY TO" do - before(:all) do - ActiveRecord::Base.connection.execute %{ - TRUNCATE TABLE test_models; - SELECT setval('test_models_id_seq', 1, false); -} - TestModel.create :data => 'test data 1' - end - - describe ".pg_copy_to_string" do - context "with no options" do - subject{ TestModel.pg_copy_to_string } - it{ should == File.open('spec/fixtures/comma_with_header.csv', 'r').read } - end - - context "with tab as delimiter" do - subject{ TestModel.pg_copy_to_string :delimiter => "\t" } - it{ should == File.open('spec/fixtures/tab_with_header.csv', 'r').read } - end - end - - describe ".pg_copy_to" do - it "should copy and pass data to block if block is given and no path is passed" do - File.open('spec/fixtures/comma_with_header.csv', 'r') do |f| - TestModel.pg_copy_to do |row| - row.should == f.readline - end - end - end - - it "should copy to disk if block is not given and a path is passed" do - TestModel.pg_copy_to '/tmp/export.csv' - File.open('spec/fixtures/comma_with_header.csv', 'r') do |fixture| - File.open('/tmp/export.csv', 'r') do |result| - result.read.should == fixture.read - end - end - end - - it "should raise exception if I pass a path and a block simultaneously" do - lambda do - TestModel.pg_copy_to('/tmp/bogus_path') do |row| - end - end.should raise_error - end - end -end diff --git a/spec/postgres-copy/with_temp_table_spec.rb b/spec/postgres-copy/with_temp_table_spec.rb new file mode 100644 index 0000000..e7cad09 --- /dev/null +++ b/spec/postgres-copy/with_temp_table_spec.rb @@ -0,0 +1,17 @@ +require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') +require 'postgres-copy/with_temp_table' + +describe '.generate' do + subject(:generate) { + PostgresCopy::WithTempTable.generate do |t| + t.string :data + end + } + + it { + generate.copy_from 'spec/fixtures/comma_with_header.csv' + data = generate.all.first + expect(data.id).to eq(1) + expect(data.data).to eq('test data 1') + } +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index da8956f..d3e4a45 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,6 +1,8 @@ $LOAD_PATH.unshift(File.dirname(__FILE__)) $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) +require 'active_record' require 'fixtures/test_model' +require 'fixtures/test_extended_model' require 'fixtures/extra_field' require 'fixtures/reserved_word_model' require 'rspec' @@ -22,9 +24,11 @@ ActiveRecord::Base.connection.execute %{ SET client_min_messages TO warning; DROP TABLE IF EXISTS test_models; - DROP TABLE IF EXISTS extra_fields; + DROP TABLE IF EXISTS test_extended_models; DROP TABLE IF EXISTS reserved_word_models; + DROP TABLE IF EXISTS extra_fields; CREATE TABLE test_models (id serial PRIMARY KEY, data text); + CREATE TABLE test_extended_models (id serial PRIMARY KEY, data text, more_data text,other_data text,final_data text ); CREATE TABLE reserved_word_models (id serial PRIMARY KEY, "select" text, "group" text); CREATE TABLE extra_fields (id serial PRIMARY KEY, data text, created_at timestamp, updated_at timestamp); }