Drizzled Public API Documentation

drizzleimport.cc
1 /*
2  Copyright (C) 2010 Vijay Samuel
3  Copyright (C) 2010 Brian Aker
4  Copyright (C) 2000-2006 MySQL AB
5  Copyright (C) 2008-2009 Sun Microsystems, Inc.
6 
7  This program is free software; you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation; version 2 of the License.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program; if not, write to the Free Software
18  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
19 
20 #define IMPORT_VERSION "4.0"
21 
22 #include "client_priv.h"
23 #include <string>
24 #include <sstream>
25 #include <iostream>
26 #include <fstream>
27 #include <boost/program_options.hpp>
28 #include <pthread.h>
29 
30 #include <drizzled/definitions.h>
31 #include <drizzled/internal/my_sys.h>
32 /* Added this for string translation. */
33 #include <drizzled/gettext.h>
34 #include <drizzled/configmake.h>
35 
36 #include "user_detect.h"
37 
38 namespace po= boost::program_options;
39 using namespace std;
40 using namespace drizzled;
41 
42 extern "C" void * worker_thread(void *arg);
43 
44 int exitcode= 0;
45 
46 const char *program_name= "drizzleimport";
47 
48 /* Global Thread counter */
49 uint32_t counter;
50 pthread_mutex_t counter_mutex;
51 pthread_cond_t count_threshhold;
52 
53 static void db_error(drizzle_con_st *con, drizzle_result_st *result,
54  drizzle_return_t ret, char *table);
55 static char *field_escape(char *to,const char *from,uint32_t length);
56 static char *add_load_option(char *ptr,const char *object,
57  const char *statement);
58 
59 static bool verbose= false, ignore_errors= false,
60  opt_delete= false, opt_replace= false, silent= false,
61  ignore_unique= false, opt_low_priority= false,
62  use_drizzle_protocol= false, opt_local_file;
63 
64 static uint32_t opt_use_threads;
65 static uint32_t opt_drizzle_port= 0;
66 static int64_t opt_ignore_lines= -1;
67 
68 std::string opt_columns,
69  opt_enclosed,
70  escaped,
71  password,
72  current_db,
73  lines_terminated,
74  current_user,
75  opt_password,
76  enclosed,
77  current_host,
78  fields_terminated,
79  opt_protocol;
80 
81 
82 static int get_options(void)
83 {
84 
85  if (! enclosed.empty() && ! opt_enclosed.empty())
86  {
87  fprintf(stderr, "You can't use ..enclosed.. and ..optionally-enclosed.. at the same time.\n");
88  return(1);
89  }
90  if (opt_replace && ignore_unique)
91  {
92  fprintf(stderr, "You can't use --ignore_unique (-i) and --replace (-r) at the same time.\n");
93  return(1);
94  }
95 
96  if (tty_password)
97  opt_password=client_get_tty_password(NULL);
98  return(0);
99 }
100 
101 
102 
103 static int write_to_table(char *filename, drizzle_con_st *con)
104 {
105  char tablename[FN_REFLEN], hard_path[FN_REFLEN],
106  sql_statement[FN_REFLEN*16+256], *end;
107  drizzle_result_st result;
108  drizzle_return_t ret;
109 
110  internal::fn_format(tablename, filename, "", "", 1 | 2); /* removes path & ext. */
111  if (not opt_local_file)
112  strcpy(hard_path,filename);
113  else
114  internal::my_load_path(hard_path, filename, NULL); /* filename includes the path */
115 
116  if (opt_delete)
117  {
118  if (verbose)
119  fprintf(stdout, "Deleting the old data from table %s\n", tablename);
120 #ifdef HAVE_SNPRINTF
121  snprintf(sql_statement, sizeof(sql_statement), "DELETE FROM %s", tablename);
122 #else
123  snprintf(sql_statement, sizeof(sql_statement), "DELETE FROM %s", tablename);
124 #endif
125  if (drizzle_query_str(con, &result, sql_statement, &ret) == NULL ||
126  ret != DRIZZLE_RETURN_OK)
127  {
128  db_error(con, &result, ret, tablename);
129  return(1);
130  }
131  drizzle_result_free(&result);
132  }
133  if (verbose)
134  {
135  if (opt_local_file)
136  fprintf(stdout, "Loading data from LOCAL file: %s into %s\n",
137  hard_path, tablename);
138  else
139  fprintf(stdout, "Loading data from SERVER file: %s into %s\n",
140  hard_path, tablename);
141  }
142  snprintf(sql_statement, sizeof(sql_statement), "LOAD DATA %s %s INFILE '%s'",
143  opt_low_priority ? "LOW_PRIORITY" : "",
144  opt_local_file ? "LOCAL" : "", hard_path);
145  end= strchr(sql_statement, '\0');
146  if (opt_replace)
147  end= strcpy(end, " REPLACE")+8;
148  if (ignore_unique)
149  end= strcpy(end, " IGNORE")+7;
150 
151  end+= sprintf(end, " INTO TABLE %s", tablename);
152 
153  if (! fields_terminated.empty() || ! enclosed.empty() || ! opt_enclosed.empty() || ! escaped.empty())
154  end= strcpy(end, " FIELDS")+7;
155  end= add_load_option(end, (char *)fields_terminated.c_str(), " TERMINATED BY");
156  end= add_load_option(end, (char *)enclosed.c_str(), " ENCLOSED BY");
157  end= add_load_option(end, (char *)opt_enclosed.c_str(),
158  " OPTIONALLY ENCLOSED BY");
159  end= add_load_option(end, (char *)escaped.c_str(), " ESCAPED BY");
160  end= add_load_option(end, (char *)lines_terminated.c_str(), " LINES TERMINATED BY");
161  if (opt_ignore_lines >= 0)
162  {
163  end= strcpy(end, " IGNORE ")+8;
164  ostringstream buffer;
165  buffer << opt_ignore_lines;
166  end= strcpy(end, buffer.str().c_str())+ buffer.str().size();
167  end= strcpy(end, " LINES")+6;
168  }
169  if (! opt_columns.empty())
170  {
171  end= strcpy(end, " (")+2;
172  end= strcpy(end, (char *)opt_columns.c_str()+opt_columns.length());
173  end= strcpy(end, ")")+1;
174  }
175  *end= '\0';
176 
177  if (drizzle_query_str(con, &result, sql_statement, &ret) == NULL ||
178  ret != DRIZZLE_RETURN_OK)
179  {
180  db_error(con, &result, ret, tablename);
181  return(1);
182  }
183  if (!silent)
184  {
185  if (strcmp(drizzle_result_info(&result), ""))
186  {
187  fprintf(stdout, "%s.%s: %s\n", current_db.c_str(), tablename,
188  drizzle_result_info(&result));
189  }
190  }
191  drizzle_result_free(&result);
192  return(0);
193 }
194 
195 
196 static drizzle_con_st *db_connect(const string host, const string database,
197  const string user, const string passwd)
198 {
199  drizzle_st *drizzle;
200  drizzle_con_st *con;
201  drizzle_return_t ret;
202 
203  if (verbose)
204  {
205  fprintf(stdout, "Connecting to %s, using protocol %s...\n", ! host.empty() ? host.c_str() : "localhost", opt_protocol.c_str());
206  }
207 
208  if ((drizzle= drizzle_create()) == NULL)
209  {
210  return 0;
211  }
212 
213  if (!(con= drizzle_con_add_tcp(drizzle,
214  host.c_str(), opt_drizzle_port,
215  user.c_str(), passwd.c_str(),
216  database.c_str(),
217  use_drizzle_protocol ? DRIZZLE_CON_EXPERIMENTAL : DRIZZLE_CON_MYSQL)))
218  {
219  return 0;
220  }
221 
222  if ((ret= drizzle_con_connect(con)) != DRIZZLE_RETURN_OK)
223  {
224  ignore_errors=0; /* NO RETURN FROM db_error */
225  db_error(con, NULL, ret, NULL);
226  }
227 
228  if (verbose)
229  fprintf(stdout, "Selecting database %s\n", database.c_str());
230 
231  return con;
232 }
233 
234 
235 
236 static void db_disconnect(const string host, drizzle_con_st *con)
237 {
238  if (verbose)
239  fprintf(stdout, "Disconnecting from %s\n", ! host.empty() ? host.c_str() : "localhost");
240  drizzle_free(drizzle_con_drizzle(con));
241 }
242 
243 
244 
245 static void safe_exit(int error, drizzle_con_st *con)
246 {
247  if (ignore_errors)
248  return;
249  if (con)
250  drizzle_free(drizzle_con_drizzle(con));
251  exit(error);
252 }
253 
254 
255 
256 static void db_error(drizzle_con_st *con, drizzle_result_st *result,
257  drizzle_return_t ret, char *table)
258 {
259  if (ret == DRIZZLE_RETURN_ERROR_CODE)
260  {
261  fprintf(stdout, "Error: %d, %s%s%s",
262  drizzle_result_error_code(result),
263  drizzle_result_error(result),
264  table ? ", when using table: " : "", table ? table : "");
265  drizzle_result_free(result);
266  }
267  else
268  {
269  fprintf(stdout, "Error: %d, %s%s%s", ret, drizzle_con_error(con),
270  table ? ", when using table: " : "", table ? table : "");
271  }
272 
273  safe_exit(1, con);
274 }
275 
276 
277 static char *add_load_option(char *ptr, const char *object,
278  const char *statement)
279 {
280  if (object)
281  {
282  /* Don't escape hex constants */
283  if (object[0] == '0' && (object[1] == 'x' || object[1] == 'X'))
284  ptr+= sprintf(ptr, " %s %s", statement, object);
285  else
286  {
287  /* char constant; escape */
288  ptr+= sprintf(ptr, " %s '", statement);
289  ptr= field_escape(ptr,object,(uint32_t) strlen(object));
290  *ptr++= '\'';
291  }
292  }
293  return ptr;
294 }
295 
296 /*
297 ** Allow the user to specify field terminator strings like:
298 ** "'", "\", "\\" (escaped backslash), "\t" (tab), "\n" (newline)
299 ** This is done by doubleing ' and add a end -\ if needed to avoid
300 ** syntax errors from the SQL parser.
301 */
302 
303 static char *field_escape(char *to,const char *from,uint32_t length)
304 {
305  const char *end;
306  uint32_t end_backslashes=0;
307 
308  for (end= from+length; from != end; from++)
309  {
310  *to++= *from;
311  if (*from == '\\')
312  end_backslashes^=1; /* find odd number of backslashes */
313  else
314  {
315  if (*from == '\'' && !end_backslashes)
316  *to++= *from; /* We want a dublicate of "'" for DRIZZLE */
317  end_backslashes=0;
318  }
319  }
320  /* Add missing backslashes if user has specified odd number of backs.*/
321  if (end_backslashes)
322  *to++= '\\';
323  return to;
324 }
325 
326 void * worker_thread(void *arg)
327 {
328  int error;
329  char *raw_table_name= (char *)arg;
330  drizzle_con_st *con;
331 
332  if (!(con= db_connect(current_host,current_db,current_user,opt_password)))
333  {
334  return 0;
335  }
336 
337  /*
338  We are not currently catching the error here.
339  */
340  if ((error= write_to_table(raw_table_name, con)))
341  {
342  if (exitcode == 0)
343  {
344  exitcode= error;
345  }
346  }
347 
348  if (con)
349  {
350  db_disconnect(current_host, con);
351  }
352 
353  pthread_mutex_lock(&counter_mutex);
354  counter--;
355  pthread_cond_signal(&count_threshhold);
356  pthread_mutex_unlock(&counter_mutex);
357 
358  return 0;
359 }
360 
361 
362 int main(int argc, char **argv)
363 {
364 try
365 {
366  po::options_description commandline_options("Options used only in command line");
367  commandline_options.add_options()
368 
369  ("debug,#", po::value<string>(),
370  "Output debug log. Often this is 'd:t:o,filename'.")
371  ("delete,d", po::value<bool>(&opt_delete)->default_value(false)->zero_tokens(),
372  "First delete all rows from table.")
373  ("help,?", "Displays this help and exits.")
374  ("ignore,i", po::value<bool>(&ignore_unique)->default_value(false)->zero_tokens(),
375  "If duplicate unique key was found, keep old row.")
376  ("low-priority", po::value<bool>(&opt_low_priority)->default_value(false)->zero_tokens(),
377  "Use LOW_PRIORITY when updating the table.")
378  ("replace,r", po::value<bool>(&opt_replace)->default_value(false)->zero_tokens(),
379  "If duplicate unique key was found, replace old row.")
380  ("verbose,v", po::value<bool>(&verbose)->default_value(false)->zero_tokens(),
381  "Print info about the various stages.")
382  ("version,V", "Output version information and exit.")
383  ;
384 
385  po::options_description import_options("Options specific to the drizzleimport");
386  import_options.add_options()
387  ("columns,C", po::value<string>(&opt_columns)->default_value(""),
388  "Use only these columns to import the data to. Give the column names in a comma separated list. This is same as giving columns to LOAD DATA INFILE.")
389  ("fields-terminated-by", po::value<string>(&fields_terminated)->default_value(""),
390  "Fields in the textfile are terminated by ...")
391  ("fields-enclosed-by", po::value<string>(&enclosed)->default_value(""),
392  "Fields in the importfile are enclosed by ...")
393  ("fields-optionally-enclosed-by", po::value<string>(&opt_enclosed)->default_value(""),
394  "Fields in the i.file are opt. enclosed by ...")
395  ("fields-escaped-by", po::value<string>(&escaped)->default_value(""),
396  "Fields in the i.file are escaped by ...")
397  ("force,f", po::value<bool>(&ignore_errors)->default_value(false)->zero_tokens(),
398  "Continue even if we get an sql-error.")
399  ("ignore-lines", po::value<int64_t>(&opt_ignore_lines)->default_value(0),
400  "Ignore first n lines of data infile.")
401  ("lines-terminated-by", po::value<string>(&lines_terminated)->default_value(""),
402  "Lines in the i.file are terminated by ...")
403  ("local,L", po::value<bool>(&opt_local_file)->default_value(false)->zero_tokens(),
404  "Read all files through the client.")
405  ("silent,s", po::value<bool>(&silent)->default_value(false)->zero_tokens(),
406  "Be more silent.")
407  ("use-threads", po::value<uint32_t>(&opt_use_threads)->default_value(4),
408  "Load files in parallel. The argument is the number of threads to use for loading data (default is 4.")
409  ;
410 
411  po::options_description client_options("Options specific to the client");
412  client_options.add_options()
413  ("host,h", po::value<string>(&current_host)->default_value("localhost"),
414  "Connect to host.")
415  ("password,P", po::value<string>(&password),
416  "Password to use when connecting to server. If password is not given it's asked from the tty." )
417  ("port,p", po::value<uint32_t>(&opt_drizzle_port)->default_value(0),
418  "Port number to use for connection")
419  ("protocol", po::value<string>(&opt_protocol)->default_value("mysql"),
420  "The protocol of connection (mysql or drizzle).")
421  ("user,u", po::value<string>(&current_user)->default_value(UserDetect().getUser()),
422  "User for login if not current user.")
423  ;
424 
425  po::options_description long_options("Allowed Options");
426  long_options.add(commandline_options).add(import_options).add(client_options);
427 
428  std::string system_config_dir_import(SYSCONFDIR);
429  system_config_dir_import.append("/drizzle/drizzleimport.cnf");
430 
431  std::string system_config_dir_client(SYSCONFDIR);
432  system_config_dir_client.append("/drizzle/client.cnf");
433 
434  std::string user_config_dir((getenv("XDG_CONFIG_HOME")? getenv("XDG_CONFIG_HOME"):"~/.config"));
435 
436  if (user_config_dir.compare(0, 2, "~/") == 0)
437  {
438  char *homedir;
439  homedir= getenv("HOME");
440  if (homedir != NULL)
441  user_config_dir.replace(0, 1, homedir);
442  }
443 
444  po::variables_map vm;
445 
446  // Disable allow_guessing
447  int style = po::command_line_style::default_style & ~po::command_line_style::allow_guessing;
448 
449  po::store(po::command_line_parser(argc, argv).options(long_options).
450  style(style).extra_parser(parse_password_arg).run(), vm);
451 
452  std::string user_config_dir_import(user_config_dir);
453  user_config_dir_import.append("/drizzle/drizzleimport.cnf");
454 
455  std::string user_config_dir_client(user_config_dir);
456  user_config_dir_client.append("/drizzle/client.cnf");
457 
458  ifstream user_import_ifs(user_config_dir_import.c_str());
459  po::store(parse_config_file(user_import_ifs, import_options), vm);
460 
461  ifstream user_client_ifs(user_config_dir_client.c_str());
462  po::store(parse_config_file(user_client_ifs, client_options), vm);
463 
464  ifstream system_import_ifs(system_config_dir_import.c_str());
465  store(parse_config_file(system_import_ifs, import_options), vm);
466 
467  ifstream system_client_ifs(system_config_dir_client.c_str());
468  po::store(parse_config_file(system_client_ifs, client_options), vm);
469 
470  po::notify(vm);
471  if (vm.count("protocol"))
472  {
473  boost::to_lower(opt_protocol);
474  if (not opt_protocol.compare("mysql"))
475  use_drizzle_protocol=false;
476  else if (not opt_protocol.compare("drizzle"))
477  use_drizzle_protocol=true;
478  else
479  {
480  cout << _("Error: Unknown protocol") << " '" << opt_protocol << "'" << endl;
481  exit(-1);
482  }
483  }
484 
485  if (vm.count("port"))
486  {
487 
488  /* If the port number is > 65535 it is not a valid port
489  This also helps with potential data loss casting unsigned long to a
490  uint32_t. */
491  if (opt_drizzle_port > 65535)
492  {
493  fprintf(stderr, _("Value supplied for port is not valid.\n"));
494  exit(EXIT_ARGUMENT_INVALID);
495  }
496  }
497 
498  if( vm.count("password") )
499  {
500  if (!opt_password.empty())
501  opt_password.erase();
502  if (password == PASSWORD_SENTINEL)
503  {
504  opt_password= "";
505  }
506  else
507  {
508  opt_password= password;
509  tty_password= false;
510  }
511  }
512  else
513  {
514  tty_password= true;
515  }
516 
517 
518  if (vm.count("version"))
519  {
520  printf("%s Ver %s Distrib %s, for %s-%s (%s)\n", program_name,
521  IMPORT_VERSION, drizzle_version(),HOST_VENDOR,HOST_OS,HOST_CPU);
522  }
523 
524  if (vm.count("help") || argc < 2)
525  {
526  printf("%s Ver %s Distrib %s, for %s-%s (%s)\n", program_name,
527  IMPORT_VERSION, drizzle_version(),HOST_VENDOR,HOST_OS,HOST_CPU);
528  puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
529  printf("\
530  Loads tables from text files in various formats. The base name of the\n\
531  text file must be the name of the table that should be used.\n\
532  If one uses sockets to connect to the Drizzle server, the server will open and\n\
533  read the text file directly. In other cases the client will open the text\n\
534  file. The SQL command 'LOAD DATA INFILE' is used to import the rows.\n");
535 
536  printf("\nUsage: %s [OPTIONS] database textfile...", program_name);
537  cout<<long_options;
538  exit(0);
539  }
540 
541 
542  if (get_options())
543  {
544  return(1);
545  }
546 
547  current_db= (*argv)++;
548  argc--;
549 
550  if (opt_use_threads)
551  {
552  pthread_t mainthread; /* Thread descriptor */
553  pthread_attr_t attr; /* Thread attributes */
554  pthread_attr_init(&attr);
555  pthread_attr_setdetachstate(&attr,
556  PTHREAD_CREATE_DETACHED);
557 
558  pthread_mutex_init(&counter_mutex, NULL);
559  pthread_cond_init(&count_threshhold, NULL);
560 
561  for (counter= 0; *argv != NULL; argv++) /* Loop through tables */
562  {
563  pthread_mutex_lock(&counter_mutex);
564  while (counter == opt_use_threads)
565  {
566  struct timespec abstime;
567 
568  set_timespec(abstime, 3);
569  pthread_cond_timedwait(&count_threshhold, &counter_mutex, &abstime);
570  }
571  /* Before exiting the lock we set ourselves up for the next thread */
572  counter++;
573  pthread_mutex_unlock(&counter_mutex);
574  /* now create the thread */
575  if (pthread_create(&mainthread, &attr, worker_thread,
576  (void *)*argv) != 0)
577  {
578  pthread_mutex_lock(&counter_mutex);
579  counter--;
580  pthread_mutex_unlock(&counter_mutex);
581  fprintf(stderr,"%s: Could not create thread\n", program_name);
582  }
583  }
584 
585  /*
586  We loop until we know that all children have cleaned up.
587  */
588  pthread_mutex_lock(&counter_mutex);
589  while (counter)
590  {
591  struct timespec abstime;
592 
593  set_timespec(abstime, 3);
594  pthread_cond_timedwait(&count_threshhold, &counter_mutex, &abstime);
595  }
596  pthread_mutex_unlock(&counter_mutex);
597  pthread_mutex_destroy(&counter_mutex);
598  pthread_cond_destroy(&count_threshhold);
599  pthread_attr_destroy(&attr);
600  }
601  else
602  {
603  drizzle_con_st *con;
604 
605  if (!(con= db_connect(current_host,current_db,current_user,opt_password)))
606  {
607  return(1);
608  }
609 
610  for (; *argv != NULL; argv++)
611  if (int error= write_to_table(*argv, con))
612  if (exitcode == 0)
613  exitcode= error;
614  db_disconnect(current_host, con);
615  }
616  opt_password.empty();
617 }
618  catch(exception &err)
619  {
620  cerr<<err.what()<<endl;
621  }
622  return(exitcode);
623 }